From 2b301df4aa00cbf4f88c716bda292d0c7126ff95 Mon Sep 17 00:00:00 2001
From: Thomas Hellstrom <thellstrom@vmware.com>
Date: Thu, 17 Mar 2011 21:09:38 +0100
Subject: [PATCH] gallium/svga: Upload only parts of user-buffers that we
 actually use

Stream user buffer contents rather than trying to maintain persistent
host / hardware copies.
Resulting negative array offsets are not allowed by the hardware,
(well, at least not according to header files), so adjust index bias
to make all array offsets positive.

Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
---
 src/gallium/drivers/svga/svga_draw.c          |  13 +-
 src/gallium/drivers/svga/svga_draw.h          |   3 +
 src/gallium/drivers/svga/svga_draw_private.h  |   7 +
 src/gallium/drivers/svga/svga_pipe_draw.c     | 130 +++++++++++++++++-
 .../drivers/svga/svga_resource_buffer.h       |   7 +
 src/gallium/drivers/svga/svga_state_vdecl.c   | 119 +++++++---------
 6 files changed, 208 insertions(+), 71 deletions(-)

diff --git a/src/gallium/drivers/svga/svga_draw.c b/src/gallium/drivers/svga/svga_draw.c
index 28ba470d8c7..aa096692888 100644
--- a/src/gallium/drivers/svga/svga_draw.c
+++ b/src/gallium/drivers/svga/svga_draw.c
@@ -242,6 +242,11 @@ svga_hwtnl_flush( struct svga_hwtnl *hwtnl )
 }
 
 
+void svga_hwtnl_set_index_bias( struct svga_hwtnl *hwtnl,
+				int index_bias)
+{
+   hwtnl->index_bias = index_bias;
+}
 
 
 
@@ -265,15 +270,16 @@ enum pipe_error svga_hwtnl_prim( struct svga_hwtnl *hwtnl,
          unsigned size = vb ? vb->width0 : 0;
          unsigned offset = hwtnl->cmd.vdecl[i].array.offset;
          unsigned stride = hwtnl->cmd.vdecl[i].array.stride;
-         unsigned index_bias = range->indexBias;
+         int index_bias = (int) range->indexBias + hwtnl->index_bias;
          unsigned width;
 
          assert(vb);
          assert(size);
          assert(offset < size);
-         assert(index_bias >= 0);
          assert(min_index <= max_index);
-         assert(offset + index_bias*stride < size);
+         if (index_bias >= 0) {
+            assert(offset + index_bias*stride < size);
+         }
          if (min_index != ~0) {
             assert(offset + (index_bias + min_index) * stride < size);
          }
@@ -394,6 +400,7 @@ enum pipe_error svga_hwtnl_prim( struct svga_hwtnl *hwtnl,
    hwtnl->cmd.max_index[hwtnl->cmd.prim_count] = max_index;
 
    hwtnl->cmd.prim[hwtnl->cmd.prim_count] = *range;
+   hwtnl->cmd.prim[hwtnl->cmd.prim_count].indexBias += hwtnl->index_bias;
 
    pipe_resource_reference(&hwtnl->cmd.prim_ib[hwtnl->cmd.prim_count], ib);
    hwtnl->cmd.prim_count++;
diff --git a/src/gallium/drivers/svga/svga_draw.h b/src/gallium/drivers/svga/svga_draw.h
index a2403d802be..1dac17421e1 100644
--- a/src/gallium/drivers/svga/svga_draw.h
+++ b/src/gallium/drivers/svga/svga_draw.h
@@ -79,5 +79,8 @@ svga_hwtnl_draw_range_elements( struct svga_hwtnl *hwtnl,
 enum pipe_error
 svga_hwtnl_flush( struct svga_hwtnl *hwtnl );
 
+void svga_hwtnl_set_index_bias( struct svga_hwtnl *hwtnl,
+                                int index_bias);
+
 
 #endif /* SVGA_DRAW_H_ */
diff --git a/src/gallium/drivers/svga/svga_draw_private.h b/src/gallium/drivers/svga/svga_draw_private.h
index ca658ac6745..8126f7ee23c 100644
--- a/src/gallium/drivers/svga/svga_draw_private.h
+++ b/src/gallium/drivers/svga/svga_draw_private.h
@@ -116,6 +116,13 @@ struct draw_cmd {
 struct svga_hwtnl {
    struct svga_context *svga;
    struct u_upload_mgr *upload_ib;
+
+   /* Additional negative index bias due to partial buffer uploads
+    * This is compensated for in the offset associated with all
+    * vertex buffers.
+    */
+
+   int index_bias;
    
    /* Flatshade information:
     */
diff --git a/src/gallium/drivers/svga/svga_pipe_draw.c b/src/gallium/drivers/svga/svga_pipe_draw.c
index a632fb12c94..8e1c764ef5f 100644
--- a/src/gallium/drivers/svga/svga_pipe_draw.c
+++ b/src/gallium/drivers/svga/svga_pipe_draw.c
@@ -37,6 +37,116 @@
 #include "svga_state.h"
 #include "svga_swtnl.h"
 #include "svga_debug.h"
+#include "svga_resource_buffer.h"
+#include "util/u_upload_mgr.h"
+
+/**
+ * svga_upload_user_buffers - upload parts of user buffers
+ *
+ * This function streams a part of a user buffer to hw and sets
+ * svga_buffer::source_offset to the first byte uploaded. After upload
+ * also svga_buffer::uploaded::buffer is set to !NULL
+ */
+
+static int
+svga_upload_user_buffers(struct svga_context *svga,
+                         unsigned start,
+                         unsigned count,
+                         unsigned instance_count)
+{
+   const struct pipe_vertex_element *ve = svga->curr.velems->velem;
+   unsigned i;
+   int ret;
+
+   for (i=0; i < svga->curr.velems->count; i++) {
+      struct pipe_vertex_buffer *vb =
+         &svga->curr.vb[ve[i].vertex_buffer_index];
+
+      if (vb->buffer && svga_buffer_is_user_buffer(vb->buffer)) {
+         struct svga_buffer *buffer = svga_buffer(vb->buffer);
+         unsigned first, size;
+         boolean flushed;
+         unsigned instance_div = ve[i].instance_divisor;
+
+         svga->dirty |= SVGA_NEW_VBUFFER;
+
+         if (instance_div) {
+            first = 0;
+            size = vb->stride *
+               (instance_count + instance_div - 1) / instance_div;
+         } else if (vb->stride) {
+            first = vb->stride * start;
+            size = vb->stride * count;
+         } else {
+            /* Only a single vertex!
+             * Upload with the largest vertex size the hw supports,
+             * if possible.
+             */
+            first = 0;
+            size = MIN2(16, vb->buffer->width0);
+         }
+
+         ret = u_upload_buffer( svga->upload_vb,
+                                0, first, size,
+                                &buffer->b.b,
+                                &buffer->uploaded.offset,
+                                &buffer->uploaded.buffer,
+                                &flushed);
+
+         if (ret)
+            return ret;
+
+         if (0)
+            debug_printf("%s: %d: orig buf %p upl buf %p ofs %d sofs %d"
+                         " sz %d\n",
+                         __FUNCTION__,
+                         i,
+                         buffer,
+                         buffer->uploaded.buffer,
+                         buffer->uploaded.offset,
+                         first,
+                         size);
+
+         vb->buffer_offset = buffer->uploaded.offset;
+         buffer->source_offset = first;
+      }
+   }
+
+   return PIPE_OK;
+}
+
+/**
+ * svga_release_user_upl_buffers - release uploaded parts of user buffers
+ *
+ * This function releases the hw copy of the uploaded fraction of the
+ * user-buffer. It's important to do this as soon as all draw calls
+ * affecting the uploaded fraction are issued, as this allows for
+ * efficient reuse of the hardware surface backing the uploaded fraction.
+ *
+ * svga_buffer::source_offset is set to 0, and svga_buffer::uploaded::buffer
+ * is set to 0.
+ */
+
+static void
+svga_release_user_upl_buffers(struct svga_context *svga)
+{
+   unsigned i;
+   unsigned nr;
+
+   nr = svga->curr.num_vertex_buffers;
+
+   for (i = 0; i < nr; ++i) {
+      struct pipe_vertex_buffer *vb = &svga->curr.vb[i];
+
+      if (vb->buffer && svga_buffer_is_user_buffer(vb->buffer)) {
+         struct svga_buffer *buffer = svga_buffer(vb->buffer);
+
+         buffer->source_offset = 0;
+         if (buffer->uploaded.buffer)
+            pipe_resource_reference(&buffer->uploaded.buffer, NULL);
+      }
+   }
+}
 
 
 
@@ -50,6 +160,7 @@ retry_draw_range_elements( struct svga_context *svga,
                            unsigned prim, 
                            unsigned start, 
                            unsigned count,
+                           unsigned instance_count,
                            boolean do_retry )
 {
    enum pipe_error ret = 0;
@@ -61,6 +172,10 @@ retry_draw_range_elements( struct svga_context *svga,
                              svga->curr.rast->templ.flatshade,
                              svga->curr.rast->templ.flatshade_first );
 
+   ret = svga_upload_user_buffers( svga, min_index + index_bias,
+                                   max_index - min_index + 1, instance_count );
+   if (ret != PIPE_OK)
+      goto retry;
 
    ret = svga_update_state( svga, SVGA_STATE_HW_DRAW );
    if (ret)
@@ -84,7 +199,7 @@ retry:
                                         index_buffer, index_size, index_bias,
                                         min_index, max_index,
                                         prim, start, count,
-                                        FALSE );
+                                        instance_count, FALSE );
    }
 
    return ret;
@@ -96,6 +211,7 @@ retry_draw_arrays( struct svga_context *svga,
                    unsigned prim, 
                    unsigned start, 
                    unsigned count,
+                   unsigned instance_count,
                    boolean do_retry )
 {
    enum pipe_error ret;
@@ -107,6 +223,11 @@ retry_draw_arrays( struct svga_context *svga,
                              svga->curr.rast->templ.flatshade,
                              svga->curr.rast->templ.flatshade_first );
 
+   ret = svga_upload_user_buffers( svga, start, count, instance_count );
+
+   if (ret != PIPE_OK)
+      goto retry;
+
    ret = svga_update_state( svga, SVGA_STATE_HW_DRAW );
    if (ret)
       goto retry;
@@ -127,6 +248,7 @@ retry:
                                 prim,
                                 start,
                                 count,
+                                instance_count,
                                 FALSE );
    }
 
@@ -183,6 +305,8 @@ svga_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
          svga_context_flush(svga, NULL);
       }
 
+      /* Avoid leaking the previous hwtnl bias to swtnl */
+      svga_hwtnl_set_index_bias( svga->hwtnl, 0 );
       ret = svga_swtnl_draw_vbo( svga, info );
    }
    else {
@@ -201,6 +325,7 @@ svga_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
                                           info->mode,
                                           info->start + offset,
                                           info->count,
+                                          info->instance_count,
                                           TRUE );
       }
       else {
@@ -208,10 +333,13 @@ svga_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
                                   info->mode,
                                   info->start,
                                   info->count,
+                                  info->instance_count,
                                   TRUE );
       }
    }
 
+   svga_release_user_upl_buffers( svga );
+
    if (SVGA_DEBUG & DEBUG_FLUSH) {
       svga_hwtnl_flush_retry( svga );
       svga_context_flush(svga, NULL);
diff --git a/src/gallium/drivers/svga/svga_resource_buffer.h b/src/gallium/drivers/svga/svga_resource_buffer.h
index 95032213fa5..2ae44d2a5e9 100644
--- a/src/gallium/drivers/svga/svga_resource_buffer.h
+++ b/src/gallium/drivers/svga/svga_resource_buffer.h
@@ -131,6 +131,13 @@ struct svga_buffer
       unsigned offset;
    } uploaded;
 
+   /**
+    * For user buffers, this is the offset to the data about to be
+    * referenced by the next draw command, and hence the data that needs
+    * to be uploaded.
+    */
+   unsigned source_offset;
+
    /**
     * DMA'ble memory.
     *
diff --git a/src/gallium/drivers/svga/svga_state_vdecl.c b/src/gallium/drivers/svga/svga_state_vdecl.c
index 7c393a1da8d..2375a022f97 100644
--- a/src/gallium/drivers/svga/svga_state_vdecl.c
+++ b/src/gallium/drivers/svga/svga_state_vdecl.c
@@ -38,57 +38,6 @@
 #include "svga_hw_reg.h"
 
 
-static int
-upload_user_buffers( struct svga_context *svga )
-{
-   enum pipe_error ret = PIPE_OK;
-   int i;
-   int nr;
-
-   if (0) 
-      debug_printf("%s: %d\n", __FUNCTION__, svga->curr.num_vertex_buffers);
-
-   nr = svga->curr.num_vertex_buffers;
-
-   for (i = 0; i < nr; i++) 
-   {
-      if (svga_buffer_is_user_buffer(svga->curr.vb[i].buffer))
-      {
-         struct svga_buffer *buffer = svga_buffer(svga->curr.vb[i].buffer);
-
-         if (!buffer->uploaded.buffer) {
-            boolean flushed;
-            ret = u_upload_buffer( svga->upload_vb,
-                                   0, 0,
-                                   buffer->b.b.width0,
-                                   &buffer->b.b,
-                                   &buffer->uploaded.offset,
-                                   &buffer->uploaded.buffer,
-                                   &flushed);
-            if (ret)
-               return ret;
-
-            if (0)
-               debug_printf("%s: %d: orig buf %p upl buf %p ofs %d sz %d\n",
-                            __FUNCTION__,
-                            i,
-                            buffer,
-                            buffer->uploaded.buffer,
-                            buffer->uploaded.offset,
-                            buffer->b.b.width0);
-         }
-
-         svga->curr.vb[i].buffer_offset = buffer->uploaded.offset;
-      }
-   }
-
-   if (0)
-      debug_printf("%s: DONE\n", __FUNCTION__);
-
-   return ret;
-}
-
-
 /***********************************************************************
  */
 
@@ -99,6 +48,7 @@ static int emit_hw_vs_vdecl( struct svga_context *svga,
    const struct pipe_vertex_element *ve = svga->curr.velems->velem;
    SVGA3dVertexDecl decl;
    unsigned i;
+   unsigned neg_bias = 0;
 
    assert(svga->curr.velems->count >=
           svga->curr.vs->base.info.file_count[TGSI_FILE_INPUT]);
@@ -106,12 +56,50 @@ static int emit_hw_vs_vdecl( struct svga_context *svga,
    svga_hwtnl_reset_vdecl( svga->hwtnl, 
                            svga->curr.velems->count );
 
+   /**
+    * We can't set the VDECL offset to something negative, so we
+    * must calculate a common negative additional index bias, and modify
+    * the VDECL offsets accordingly so they *all* end up positive.
+    *
+    * Note that the exact value of the negative index bias is not that
+    * important, since we compensate for it when we calculate the vertex
+    * buffer offset below. The important thing is that all vertex buffer
+    * offsets remain positive.
+    *
+    * Note that we use a negative bias variable in order to make the
+    * rounding maths more easy to follow, and to avoid int / unsigned
+    * confusion.
+    */
+
    for (i = 0; i < svga->curr.velems->count; i++) {
-      const struct pipe_vertex_buffer *vb = &svga->curr.vb[ve[i].vertex_buffer_index];
+      const struct pipe_vertex_buffer *vb =
+         &svga->curr.vb[ve[i].vertex_buffer_index];
+      struct svga_buffer *buffer;
+      unsigned int offset = vb->buffer_offset + ve[i].src_offset;
+      unsigned tmp_neg_bias = 0;
+
+      if (!vb->buffer)
+         continue;
+
+      buffer = svga_buffer(vb->buffer);
+      if (buffer->source_offset > offset) {
+         tmp_neg_bias = buffer->source_offset - offset;
+         if (vb->stride)
+            tmp_neg_bias = (tmp_neg_bias + vb->stride - 1) / vb->stride;
+         neg_bias = MAX2(neg_bias, tmp_neg_bias);
+      }
+   }
+
+   for (i = 0; i < svga->curr.velems->count; i++) {
+      const struct pipe_vertex_buffer *vb =
+         &svga->curr.vb[ve[i].vertex_buffer_index];
       unsigned usage, index;
-      struct svga_buffer *buffer = svga_buffer(vb->buffer);
+      struct svga_buffer *buffer;
 
+      if (!vb->buffer)
+         continue;
 
+      buffer= svga_buffer(vb->buffer);
       svga_generate_vdecl_semantics( i, &usage, &index );
 
       /* SVGA_NEW_VELEMENT
@@ -121,8 +109,16 @@ static int emit_hw_vs_vdecl( struct svga_context *svga,
       decl.identity.usage = usage;
       decl.identity.usageIndex = index;
       decl.array.stride = vb->stride;
-      decl.array.offset = (vb->buffer_offset +
-                           ve[i].src_offset);
+
+      /* Compensate for partially uploaded vbo, and
+       * for the negative index bias.
+       */
+      decl.array.offset = (vb->buffer_offset
+                           + ve[i].src_offset
+			   + neg_bias * vb->stride
+			   - buffer->source_offset);
+
+      assert(decl.array.offset >= 0);
 
       svga_hwtnl_vdecl( svga->hwtnl,
                         i,
@@ -131,6 +127,7 @@ static int emit_hw_vs_vdecl( struct svga_context *svga,
                         vb->buffer );
    }
 
+   svga_hwtnl_set_index_bias( svga->hwtnl, -neg_bias );
    return 0;
 }
 
@@ -138,23 +135,11 @@ static int emit_hw_vs_vdecl( struct svga_context *svga,
 static int emit_hw_vdecl( struct svga_context *svga,
                           unsigned dirty )
 {
-   int ret = 0;
-
    /* SVGA_NEW_NEED_SWTNL
     */
    if (svga->state.sw.need_swtnl)
       return 0; /* Do not emit during swtnl */
 
-   /* If we get to here, we know that we're going to draw.  Upload
-    * userbuffers now and try to combine multiple userbuffers from
-    * multiple draw calls into a single host buffer for performance.
-    */
-   if (svga->curr.any_user_vertex_buffers) {
-      ret = upload_user_buffers( svga );
-      if (ret)
-         return ret;
-   }
-
    return emit_hw_vs_vdecl( svga, dirty );
 }
 
-- 
2.30.2