gallium/svga: Upload only parts of user-buffers that we actually use

author Thomas Hellstrom <thellstrom@vmware.com>

Thu, 17 Mar 2011 20:09:38 +0000 (21:09 +0100)

committer Thomas Hellstrom <thellstrom@vmware.com>

Fri, 1 Jul 2011 11:30:38 +0000 (13:30 +0200)
author Thomas Hellstrom <thellstrom@vmware.com>
Thu, 17 Mar 2011 20:09:38 +0000 (21:09 +0100)
committer Thomas Hellstrom <thellstrom@vmware.com>
Fri, 1 Jul 2011 11:30:38 +0000 (13:30 +0200)
diff --git a/src/gallium/drivers/svga/svga_draw.c b/src/gallium/drivers/svga/svga_draw.c

index 28ba470d8c7cb94344fe2500a9ef705aecac94bc..aa0966928882c790a83436b85772fc40b53b889f 100644 (file)
--- a/src/gallium/drivers/svga/svga_draw.c
+++ b/src/gallium/drivers/svga/svga_draw.c
@@ -242,6 +242,11 @@ svga_hwtnl_flush( struct svga_hwtnl *hwtnl )
  }
  
  
+void svga_hwtnl_set_index_bias( struct svga_hwtnl *hwtnl,
+                               int index_bias)
+{
+   hwtnl->index_bias = index_bias;
+}
  
  
  
@@ -265,15 +270,16 @@ enum pipe_error svga_hwtnl_prim( struct svga_hwtnl *hwtnl,
           unsigned size = vb ? vb->width0 : 0;
           unsigned offset = hwtnl->cmd.vdecl[i].array.offset;
           unsigned stride = hwtnl->cmd.vdecl[i].array.stride;
-         unsigned index_bias = range->indexBias;
+         int index_bias = (int) range->indexBias + hwtnl->index_bias;
           unsigned width;
  
           assert(vb);
           assert(size);
           assert(offset < size);
-         assert(index_bias >= 0);
           assert(min_index <= max_index);
-         assert(offset + index_bias*stride < size);
+         if (index_bias >= 0) {
+            assert(offset + index_bias*stride < size);
+         }
           if (min_index != ~0) {
              assert(offset + (index_bias + min_index) * stride < size);
           }
@@ -394,6 +400,7 @@ enum pipe_error svga_hwtnl_prim( struct svga_hwtnl *hwtnl,
     hwtnl->cmd.max_index[hwtnl->cmd.prim_count] = max_index;
  
     hwtnl->cmd.prim[hwtnl->cmd.prim_count] = *range;
+   hwtnl->cmd.prim[hwtnl->cmd.prim_count].indexBias += hwtnl->index_bias;
  
     pipe_resource_reference(&hwtnl->cmd.prim_ib[hwtnl->cmd.prim_count], ib);
     hwtnl->cmd.prim_count++;
diff --git a/src/gallium/drivers/svga/svga_draw.h b/src/gallium/drivers/svga/svga_draw.h

index a2403d802bebe46293498c9983127746de046632..1dac17421e1d06cb7e9a0e3ff41a75e2a2e69715 100644 (file)
--- a/src/gallium/drivers/svga/svga_draw.h
+++ b/src/gallium/drivers/svga/svga_draw.h
@@ -79,5 +79,8 @@ svga_hwtnl_draw_range_elements( struct svga_hwtnl *hwtnl,
  enum pipe_error
  svga_hwtnl_flush( struct svga_hwtnl *hwtnl );
  
+void svga_hwtnl_set_index_bias( struct svga_hwtnl *hwtnl,
+                                int index_bias);
+
  
  #endif /* SVGA_DRAW_H_ */
diff --git a/src/gallium/drivers/svga/svga_draw_private.h b/src/gallium/drivers/svga/svga_draw_private.h

index ca658ac6745dafacaa65401032811356704ce424..8126f7ee23c8e9da5dc90d117d39d34a91c8f85b 100644 (file)
--- a/src/gallium/drivers/svga/svga_draw_private.h
+++ b/src/gallium/drivers/svga/svga_draw_private.h
@@ -116,6 +116,13 @@ struct draw_cmd {
  struct svga_hwtnl {
     struct svga_context *svga;
     struct u_upload_mgr *upload_ib;
+
+   /* Additional negative index bias due to partial buffer uploads
+    * This is compensated for in the offset associated with all
+    * vertex buffers.
+    */
+
+   int index_bias;
     
     /* Flatshade information:
      */
diff --git a/src/gallium/drivers/svga/svga_pipe_draw.c b/src/gallium/drivers/svga/svga_pipe_draw.c

index a632fb12c94ec220bb68d05154d297add4b073e4..8e1c764ef5f60f5024d8beafd6bd7ec081453bb3 100644 (file)
--- a/src/gallium/drivers/svga/svga_pipe_draw.c
+++ b/src/gallium/drivers/svga/svga_pipe_draw.c
@@ -37,6 +37,116 @@
  #include "svga_state.h"
  #include "svga_swtnl.h"
  #include "svga_debug.h"
+#include "svga_resource_buffer.h"
+#include "util/u_upload_mgr.h"
+
+/**
+ * svga_upload_user_buffers - upload parts of user buffers
+ *
+ * This function streams a part of a user buffer to hw and sets
+ * svga_buffer::source_offset to the first byte uploaded. After upload
+ * also svga_buffer::uploaded::buffer is set to !NULL
+ */
+
+static int
+svga_upload_user_buffers(struct svga_context *svga,
+                         unsigned start,
+                         unsigned count,
+                         unsigned instance_count)
+{
+   const struct pipe_vertex_element *ve = svga->curr.velems->velem;
+   unsigned i;
+   int ret;
+
+   for (i=0; i < svga->curr.velems->count; i++) {
+      struct pipe_vertex_buffer *vb =
+         &svga->curr.vb[ve[i].vertex_buffer_index];
+
+      if (vb->buffer && svga_buffer_is_user_buffer(vb->buffer)) {
+         struct svga_buffer *buffer = svga_buffer(vb->buffer);
+         unsigned first, size;
+         boolean flushed;
+         unsigned instance_div = ve[i].instance_divisor;
+
+         svga->dirty |= SVGA_NEW_VBUFFER;
+
+         if (instance_div) {
+            first = 0;
+            size = vb->stride *
+               (instance_count + instance_div - 1) / instance_div;
+         } else if (vb->stride) {
+            first = vb->stride * start;
+            size = vb->stride * count;
+         } else {
+            /* Only a single vertex!
+             * Upload with the largest vertex size the hw supports,
+             * if possible.
+             */
+            first = 0;
+            size = MIN2(16, vb->buffer->width0);
+         }
+
+         ret = u_upload_buffer( svga->upload_vb,
+                                0, first, size,
+                                &buffer->b.b,
+                                &buffer->uploaded.offset,
+                                &buffer->uploaded.buffer,
+                                &flushed);
+
+         if (ret)
+            return ret;
+
+         if (0)
+            debug_printf("%s: %d: orig buf %p upl buf %p ofs %d sofs %d"
+                         " sz %d\n",
+                         __FUNCTION__,
+                         i,
+                         buffer,
+                         buffer->uploaded.buffer,
+                         buffer->uploaded.offset,
+                         first,
+                         size);
+
+         vb->buffer_offset = buffer->uploaded.offset;
+         buffer->source_offset = first;
+      }
+   }
+
+   return PIPE_OK;
+}
+
+/**
+ * svga_release_user_upl_buffers - release uploaded parts of user buffers
+ *
+ * This function releases the hw copy of the uploaded fraction of the
+ * user-buffer. It's important to do this as soon as all draw calls
+ * affecting the uploaded fraction are issued, as this allows for
+ * efficient reuse of the hardware surface backing the uploaded fraction.
+ *
+ * svga_buffer::source_offset is set to 0, and svga_buffer::uploaded::buffer
+ * is set to 0.
+ */
+
+static void
+svga_release_user_upl_buffers(struct svga_context *svga)
+{
+   unsigned i;
+   unsigned nr;
+
+   nr = svga->curr.num_vertex_buffers;
+
+   for (i = 0; i < nr; ++i) {
+      struct pipe_vertex_buffer *vb = &svga->curr.vb[i];
+
+      if (vb->buffer && svga_buffer_is_user_buffer(vb->buffer)) {
+         struct svga_buffer *buffer = svga_buffer(vb->buffer);
+
+         buffer->source_offset = 0;
+         if (buffer->uploaded.buffer)
+            pipe_resource_reference(&buffer->uploaded.buffer, NULL);
+      }
+   }
+}
  
  
  
@@ -50,6 +160,7 @@ retry_draw_range_elements( struct svga_context *svga,
                             unsigned prim, 
                             unsigned start, 
                             unsigned count,
+                           unsigned instance_count,
                             boolean do_retry )
  {
     enum pipe_error ret = 0;
@@ -61,6 +172,10 @@ retry_draw_range_elements( struct svga_context *svga,
                               svga->curr.rast->templ.flatshade,
                               svga->curr.rast->templ.flatshade_first );
  
+   ret = svga_upload_user_buffers( svga, min_index + index_bias,
+                                   max_index - min_index + 1, instance_count );
+   if (ret != PIPE_OK)
+      goto retry;
  
     ret = svga_update_state( svga, SVGA_STATE_HW_DRAW );
     if (ret)
@@ -84,7 +199,7 @@ retry:
                                          index_buffer, index_size, index_bias,
                                          min_index, max_index,
                                          prim, start, count,
-                                        FALSE );
+                                        instance_count, FALSE );
     }
  
     return ret;
@@ -96,6 +211,7 @@ retry_draw_arrays( struct svga_context *svga,
                     unsigned prim, 
                     unsigned start, 
                     unsigned count,
+                   unsigned instance_count,
                     boolean do_retry )
  {
     enum pipe_error ret;
@@ -107,6 +223,11 @@ retry_draw_arrays( struct svga_context *svga,
                               svga->curr.rast->templ.flatshade,
                               svga->curr.rast->templ.flatshade_first );
  
+   ret = svga_upload_user_buffers( svga, start, count, instance_count );
+
+   if (ret != PIPE_OK)
+      goto retry;
+
     ret = svga_update_state( svga, SVGA_STATE_HW_DRAW );
     if (ret)
        goto retry;
@@ -127,6 +248,7 @@ retry:
                                  prim,
                                  start,
                                  count,
+                                instance_count,
                                  FALSE );
     }
  
@@ -183,6 +305,8 @@ svga_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
           svga_context_flush(svga, NULL);
        }
  
+      /* Avoid leaking the previous hwtnl bias to swtnl */
+      svga_hwtnl_set_index_bias( svga->hwtnl, 0 );
        ret = svga_swtnl_draw_vbo( svga, info );
     }
     else {
@@ -201,6 +325,7 @@ svga_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
                                            info->mode,
                                            info->start + offset,
                                            info->count,
+                                          info->instance_count,
                                            TRUE );
        }
        else {
@@ -208,10 +333,13 @@ svga_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
                                    info->mode,
                                    info->start,
                                    info->count,
+                                  info->instance_count,
                                    TRUE );
        }
     }
  
+   svga_release_user_upl_buffers( svga );
+
     if (SVGA_DEBUG & DEBUG_FLUSH) {
        svga_hwtnl_flush_retry( svga );
        svga_context_flush(svga, NULL);
diff --git a/src/gallium/drivers/svga/svga_resource_buffer.h b/src/gallium/drivers/svga/svga_resource_buffer.h

index 95032213fa50767ddc3c982add1a5ac7a6dff9c1..2ae44d2a5e954ddc5feace08f64ab229862905d8 100644 (file)
--- a/src/gallium/drivers/svga/svga_resource_buffer.h
+++ b/src/gallium/drivers/svga/svga_resource_buffer.h
@@ -131,6 +131,13 @@ struct svga_buffer
        unsigned offset;
     } uploaded;
  
+   /**
+    * For user buffers, this is the offset to the data about to be
+    * referenced by the next draw command, and hence the data that needs
+    * to be uploaded.
+    */
+   unsigned source_offset;
+
     /**
      * DMA'ble memory.
      *
diff --git a/src/gallium/drivers/svga/svga_state_vdecl.c b/src/gallium/drivers/svga/svga_state_vdecl.c

index 7c393a1da8debabb777cd88fb94ad6722a69c096..2375a022f97a7195d95df036a3dbf44f9350c810 100644 (file)
--- a/src/gallium/drivers/svga/svga_state_vdecl.c
+++ b/src/gallium/drivers/svga/svga_state_vdecl.c
@@ -38,57 +38,6 @@
  #include "svga_hw_reg.h"
  
  
-static int
-upload_user_buffers( struct svga_context *svga )
-{
-   enum pipe_error ret = PIPE_OK;
-   int i;
-   int nr;
-
-   if (0) 
-      debug_printf("%s: %d\n", __FUNCTION__, svga->curr.num_vertex_buffers);
-
-   nr = svga->curr.num_vertex_buffers;
-
-   for (i = 0; i < nr; i++) 
-   {
-      if (svga_buffer_is_user_buffer(svga->curr.vb[i].buffer))
-      {
-         struct svga_buffer *buffer = svga_buffer(svga->curr.vb[i].buffer);
-
-         if (!buffer->uploaded.buffer) {
-            boolean flushed;
-            ret = u_upload_buffer( svga->upload_vb,
-                                   0, 0,
-                                   buffer->b.b.width0,
-                                   &buffer->b.b,
-                                   &buffer->uploaded.offset,
-                                   &buffer->uploaded.buffer,
-                                   &flushed);
-            if (ret)
-               return ret;
-
-            if (0)
-               debug_printf("%s: %d: orig buf %p upl buf %p ofs %d sz %d\n",
-                            __FUNCTION__,
-                            i,
-                            buffer,
-                            buffer->uploaded.buffer,
-                            buffer->uploaded.offset,
-                            buffer->b.b.width0);
-         }
-
-         svga->curr.vb[i].buffer_offset = buffer->uploaded.offset;
-      }
-   }
-
-   if (0)
-      debug_printf("%s: DONE\n", __FUNCTION__);
-
-   return ret;
-}
-
-
  /***********************************************************************
   */
  
@@ -99,6 +48,7 @@ static int emit_hw_vs_vdecl( struct svga_context *svga,
     const struct pipe_vertex_element *ve = svga->curr.velems->velem;
     SVGA3dVertexDecl decl;
     unsigned i;
+   unsigned neg_bias = 0;
  
     assert(svga->curr.velems->count >=
            svga->curr.vs->base.info.file_count[TGSI_FILE_INPUT]);
@@ -106,12 +56,50 @@ static int emit_hw_vs_vdecl( struct svga_context *svga,
     svga_hwtnl_reset_vdecl( svga->hwtnl, 
                             svga->curr.velems->count );
  
+   /**
+    * We can't set the VDECL offset to something negative, so we
+    * must calculate a common negative additional index bias, and modify
+    * the VDECL offsets accordingly so they *all* end up positive.
+    *
+    * Note that the exact value of the negative index bias is not that
+    * important, since we compensate for it when we calculate the vertex
+    * buffer offset below. The important thing is that all vertex buffer
+    * offsets remain positive.
+    *
+    * Note that we use a negative bias variable in order to make the
+    * rounding maths more easy to follow, and to avoid int / unsigned
+    * confusion.
+    */
+
     for (i = 0; i < svga->curr.velems->count; i++) {
-      const struct pipe_vertex_buffer *vb = &svga->curr.vb[ve[i].vertex_buffer_index];
+      const struct pipe_vertex_buffer *vb =
+         &svga->curr.vb[ve[i].vertex_buffer_index];
+      struct svga_buffer *buffer;
+      unsigned int offset = vb->buffer_offset + ve[i].src_offset;
+      unsigned tmp_neg_bias = 0;
+
+      if (!vb->buffer)
+         continue;
+
+      buffer = svga_buffer(vb->buffer);
+      if (buffer->source_offset > offset) {
+         tmp_neg_bias = buffer->source_offset - offset;
+         if (vb->stride)
+            tmp_neg_bias = (tmp_neg_bias + vb->stride - 1) / vb->stride;
+         neg_bias = MAX2(neg_bias, tmp_neg_bias);
+      }
+   }
+
+   for (i = 0; i < svga->curr.velems->count; i++) {
+      const struct pipe_vertex_buffer *vb =
+         &svga->curr.vb[ve[i].vertex_buffer_index];
        unsigned usage, index;
-      struct svga_buffer *buffer = svga_buffer(vb->buffer);
+      struct svga_buffer *buffer;
  
+      if (!vb->buffer)
+         continue;
  
+      buffer= svga_buffer(vb->buffer);
        svga_generate_vdecl_semantics( i, &usage, &index );
  
        /* SVGA_NEW_VELEMENT
@@ -121,8 +109,16 @@ static int emit_hw_vs_vdecl( struct svga_context *svga,
        decl.identity.usage = usage;
        decl.identity.usageIndex = index;
        decl.array.stride = vb->stride;
-      decl.array.offset = (vb->buffer_offset +
-                           ve[i].src_offset);
+
+      /* Compensate for partially uploaded vbo, and
+       * for the negative index bias.
+       */
+      decl.array.offset = (vb->buffer_offset
+                           + ve[i].src_offset
+                          + neg_bias * vb->stride
+                          - buffer->source_offset);
+
+      assert(decl.array.offset >= 0);
  
        svga_hwtnl_vdecl( svga->hwtnl,
                          i,
@@ -131,6 +127,7 @@ static int emit_hw_vs_vdecl( struct svga_context *svga,
                          vb->buffer );
     }
  
+   svga_hwtnl_set_index_bias( svga->hwtnl, -neg_bias );
     return 0;
  }
  
@@ -138,23 +135,11 @@ static int emit_hw_vs_vdecl( struct svga_context *svga,
  static int emit_hw_vdecl( struct svga_context *svga,
                            unsigned dirty )
  {
-   int ret = 0;
-
     /* SVGA_NEW_NEED_SWTNL
      */
     if (svga->state.sw.need_swtnl)
        return 0; /* Do not emit during swtnl */
  
-   /* If we get to here, we know that we're going to draw.  Upload
-    * userbuffers now and try to combine multiple userbuffers from
-    * multiple draw calls into a single host buffer for performance.
-    */
-   if (svga->curr.any_user_vertex_buffers) {
-      ret = upload_user_buffers( svga );
-      if (ret)
-         return ret;
-   }
-
     return emit_hw_vs_vdecl( svga, dirty );
  }
author	Thomas Hellstrom <thellstrom@vmware.com>
	Thu, 17 Mar 2011 20:09:38 +0000 (21:09 +0100)
committer	Thomas Hellstrom <thellstrom@vmware.com>
	Fri, 1 Jul 2011 11:30:38 +0000 (13:30 +0200)
src/gallium/drivers/svga/svga_draw.c		patch \| blob \| history
src/gallium/drivers/svga/svga_draw.h		patch \| blob \| history
src/gallium/drivers/svga/svga_draw_private.h		patch \| blob \| history
src/gallium/drivers/svga/svga_pipe_draw.c		patch \| blob \| history
src/gallium/drivers/svga/svga_resource_buffer.h		patch \| blob \| history
src/gallium/drivers/svga/svga_state_vdecl.c		patch \| blob \| history