virgl: Use buffer copy transfers to avoid waiting when mapping
authorAlexandros Frantzis <alexandros.frantzis@collabora.com>
Wed, 8 May 2019 09:10:21 +0000 (12:10 +0300)
committerChia-I Wu <olvaffe@gmail.com>
Sat, 8 Jun 2019 04:45:39 +0000 (21:45 -0700)
We typically need to wait for a buffer to become ready before mapping,
so that we don't write new contents while the host is still using the
old contents. However, if we are allowed to discard the contents of the
mapped buffer range, then we can avoid waiting by using a staging buffer
range which we guarantee to never be busy, copying from the staging
buffer range to the target buffer in the host.

This commit implements this optimization by utilizing a dedicated
u_upload_mgr for the staging buffer.

Performance results:
Twilight Struggle (Steam/Proton), qemu before: 7 FPS after: 25 FPS
glmark2 ubo, qemu before: 38 FPS after: 331 FPS

Signed-off-by: Alexandros Frantzis <alexandros.frantzis@collabora.com>
Suggested-by: Gurchetan Singh <gurchetansingh@chromium.org>
Reviewed-by: Chia-I Wu <olvaffe@gmail.com>
src/gallium/drivers/virgl/virgl_buffer.c
src/gallium/drivers/virgl/virgl_context.c
src/gallium/drivers/virgl/virgl_context.h
src/gallium/drivers/virgl/virgl_resource.c
src/gallium/drivers/virgl/virgl_resource.h
src/gallium/drivers/virgl/virgl_transfer_queue.c

index 284eeca4d8f89148f60045c8c222285699d1d355..882fae2f8ba857cc27bfdfb3c6e7280961b1a941 100644 (file)
@@ -24,6 +24,7 @@
 #include "util/u_inlines.h"
 #include "util/u_memory.h"
 #include "virgl_context.h"
+#include "virgl_encode.h"
 #include "virgl_resource.h"
 #include "virgl_screen.h"
 
@@ -39,6 +40,7 @@ static void *virgl_buffer_transfer_map(struct pipe_context *ctx,
    struct virgl_resource *vbuf = virgl_resource(resource);
    struct virgl_transfer *trans;
    enum virgl_transfer_map_type map_type;
+   void *map_addr;
 
    trans = virgl_resource_create_transfer(&vctx->transfer_pool, resource,
                                           &vbuf->metadata, level, usage, box);
@@ -47,14 +49,24 @@ static void *virgl_buffer_transfer_map(struct pipe_context *ctx,
    switch (map_type) {
    case VIRGL_TRANSFER_MAP_HW_RES:
       trans->hw_res_map = vs->vws->resource_map(vs->vws, vbuf->hw_res);
+      if (trans->hw_res_map)
+         map_addr = trans->hw_res_map + trans->offset;
+      else
+         map_addr = NULL;
+      break;
+   case VIRGL_TRANSFER_MAP_STAGING:
+      map_addr = virgl_transfer_uploader_map(vctx, trans);
+      /* Copy transfers don't make use of hw_res_map at the moment. */
+      trans->hw_res_map = NULL;
       break;
    case VIRGL_TRANSFER_MAP_ERROR:
    default:
       trans->hw_res_map = NULL;
+      map_addr = NULL;
       break;
    }
 
-   if (!trans->hw_res_map) {
+   if (!map_addr) {
       virgl_resource_destroy_transfer(&vctx->transfer_pool, trans);
       return NULL;
    }
@@ -63,7 +75,7 @@ static void *virgl_buffer_transfer_map(struct pipe_context *ctx,
        util_range_add(&vbuf->valid_buffer_range, box->x, box->x + box->width);
 
    *transfer = &trans->base;
-   return trans->hw_res_map + trans->offset;
+   return map_addr;
 }
 
 static void virgl_buffer_transfer_unmap(struct pipe_context *ctx,
@@ -71,6 +83,15 @@ static void virgl_buffer_transfer_unmap(struct pipe_context *ctx,
 {
    struct virgl_context *vctx = virgl_context(ctx);
    struct virgl_transfer *trans = virgl_transfer(transfer);
+   struct virgl_screen *vs = virgl_screen(ctx->screen);
+   struct pipe_resource *res = transfer->resource;
+
+   /* We don't need to transfer the contents of staging buffers, since they
+    * don't have any host-side storage. */
+   if (pipe_to_virgl_bind(vs, res->bind, res->flags) == VIRGL_BIND_STAGING) {
+      virgl_resource_destroy_transfer(&vctx->transfer_pool, trans);
+      return;
+   }
 
    if (trans->base.usage & PIPE_TRANSFER_WRITE) {
       if (transfer->usage & PIPE_TRANSFER_FLUSH_EXPLICIT) {
@@ -84,7 +105,14 @@ static void virgl_buffer_transfer_unmap(struct pipe_context *ctx,
          trans->offset = transfer->box.x;
       }
 
-      virgl_transfer_queue_unmap(&vctx->queue, trans);
+      if (trans->copy_src_res) {
+         virgl_encode_copy_transfer(vctx, trans);
+         /* It's now safe for other mappings to use the transfer_uploader. */
+         vctx->transfer_uploader_in_use = false;
+         virgl_resource_destroy_transfer(&vctx->transfer_pool, trans);
+      } else {
+         virgl_transfer_queue_unmap(&vctx->queue, trans);
+      }
    } else
       virgl_resource_destroy_transfer(&vctx->transfer_pool, trans);
 }
index 507160a5580c83462cc7ae69a52ae04d1e0e2e63..9964520d5e7dd991302eb3bafb969f0f98d3575a 100644 (file)
@@ -1220,6 +1220,8 @@ virgl_context_destroy( struct pipe_context *ctx )
    rs->vws->cmd_buf_destroy(vctx->cbuf);
    if (vctx->uploader)
       u_upload_destroy(vctx->uploader);
+   if (vctx->transfer_uploader)
+      u_upload_destroy(vctx->transfer_uploader);
    util_primconvert_destroy(vctx->primconvert);
    virgl_transfer_queue_fini(&vctx->queue);
 
@@ -1380,6 +1382,18 @@ struct pipe_context *virgl_context_create(struct pipe_screen *pscreen,
            goto fail;
    vctx->base.stream_uploader = vctx->uploader;
    vctx->base.const_uploader = vctx->uploader;
+   /* Use a custom/staging buffer for the transfer uploader, since we are
+    * using it only for copies to other resources.
+    */
+   if ((rs->caps.caps.v2.capability_bits & VIRGL_CAP_COPY_TRANSFER) &&
+       vctx->encoded_transfers) {
+      vctx->transfer_uploader = u_upload_create(&vctx->base, 1024 * 1024,
+                                                PIPE_BIND_CUSTOM,
+                                                PIPE_USAGE_STAGING,
+                                                VIRGL_RESOURCE_FLAG_STAGING);
+      if (!vctx->transfer_uploader)
+              goto fail;
+   }
 
    vctx->hw_sub_ctx_id = rs->sub_ctx_id++;
    virgl_encoder_create_sub_ctx(vctx, vctx->hw_sub_ctx_id);
@@ -1394,5 +1408,6 @@ struct pipe_context *virgl_context_create(struct pipe_screen *pscreen,
 
    return &vctx->base;
 fail:
+   virgl_context_destroy(&vctx->base);
    return NULL;
 }
index 7fd0740a9a043e0979941cde21e3ef26df6666f6..1449c64189ac413f5a475ca6ae250934a2c2a9e6 100644 (file)
@@ -81,6 +81,8 @@ struct virgl_context {
    struct slab_child_pool transfer_pool;
    struct virgl_transfer_queue queue;
    struct u_upload_mgr *uploader;
+   struct u_upload_mgr *transfer_uploader;
+   bool transfer_uploader_in_use;
    bool encoded_transfers;
 
    struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS];
index fd01df1c10a5f2b879dbf92eb113245d908a7cc5..d9d79b9c0df6a74ee38532b4f7f49313bd5708c7 100644 (file)
@@ -23,6 +23,7 @@
 #include "util/u_format.h"
 #include "util/u_inlines.h"
 #include "util/u_memory.h"
+#include "util/u_upload_mgr.h"
 #include "virgl_context.h"
 #include "virgl_resource.h"
 #include "virgl_screen.h"
@@ -76,12 +77,17 @@ enum virgl_transfer_map_type
 virgl_resource_transfer_prepare(struct virgl_context *vctx,
                                 struct virgl_transfer *xfer)
 {
-   struct virgl_winsys *vws = virgl_screen(vctx->base.screen)->vws;
+   struct virgl_screen *vs = virgl_screen(vctx->base.screen);
+   struct virgl_winsys *vws = vs->vws;
    struct virgl_resource *res = virgl_resource(xfer->base.resource);
    enum virgl_transfer_map_type map_type = VIRGL_TRANSFER_MAP_HW_RES;
+   bool unsynchronized = xfer->base.usage & PIPE_TRANSFER_UNSYNCHRONIZED;
+   bool discard = xfer->base.usage & (PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE |
+                                      PIPE_TRANSFER_DISCARD_RANGE);
    bool flush;
    bool readback;
    bool wait;
+   bool copy_transfer;
 
    /* there is no way to map the host storage currently */
    if (xfer->base.usage & PIPE_TRANSFER_MAP_DIRECTLY)
@@ -98,10 +104,20 @@ virgl_resource_transfer_prepare(struct virgl_context *vctx,
    flush = virgl_res_needs_flush(vctx, xfer);
    readback = virgl_res_needs_readback(vctx, res, xfer->base.usage,
                                        xfer->base.level);
+
+   /* Check if we should perform a copy transfer through the transfer_uploader. */
+   copy_transfer = res->u.b.target == PIPE_BUFFER &&
+                   discard &&
+                   !readback &&
+                   !unsynchronized &&
+                   vctx->transfer_uploader &&
+                   !vctx->transfer_uploader_in_use &&
+                   (flush || vws->resource_is_busy(vws, res->hw_res));
+
    /* We need to wait for all cmdbufs, current or previous, that access the
     * resource to finish unless synchronization is disabled.
     */
-   wait = !(xfer->base.usage & PIPE_TRANSFER_UNSYNCHRONIZED);
+   wait = !unsynchronized;
 
    /* When the transfer range consists of only uninitialized data, we can
     * assume the GPU is not accessing the range and readback is unnecessary.
@@ -114,6 +130,15 @@ virgl_resource_transfer_prepare(struct virgl_context *vctx,
       flush = false;
       readback = false;
       wait = false;
+      copy_transfer = false;
+   }
+
+   /* When performing a copy transfer there is no need to flush or wait for
+    * the target resource.
+    */
+   if (copy_transfer) {
+      flush = false;
+      wait = false;
    }
 
    /* readback has some implications */
@@ -166,6 +191,9 @@ virgl_resource_transfer_prepare(struct virgl_context *vctx,
    if (wait)
       vws->resource_wait(vws, res->hw_res);
 
+   if (copy_transfer)
+      map_type = VIRGL_TRANSFER_MAP_STAGING;
+
    return map_type;
 }
 
@@ -461,3 +489,55 @@ void virgl_resource_dirty(struct virgl_resource *res, uint32_t level)
          res->clean_mask &= ~(1 << level);
    }
 }
+
+void *virgl_transfer_uploader_map(struct virgl_context *vctx,
+                                  struct virgl_transfer *vtransfer)
+{
+   struct virgl_resource *vres = virgl_resource(vtransfer->base.resource);
+   unsigned size;
+   unsigned align_offset;
+   void *map_addr;
+
+   assert(vctx->transfer_uploader);
+   assert(!vctx->transfer_uploader_in_use);
+
+   size = vtransfer->base.box.width;
+
+   /* For buffers we need to ensure that the start of the buffer would be
+    * aligned to VIRGL_MAP_BUFFER_ALIGNMENT, even if our transfer doesn't
+    * actually include it. To achieve this we may need to allocate a slightly
+    * larger range from the upload buffer, and later update the uploader
+    * resource offset and map address to point to the requested x coordinate
+    * within that range.
+    *
+    * 0       A       2A      3A
+    * |-------|---bbbb|bbbbb--|
+    *             |--------|    ==> size
+    *         |---|             ==> align_offset
+    *         |------------|    ==> allocation of size + align_offset
+    */
+   align_offset = vtransfer->base.box.x % VIRGL_MAP_BUFFER_ALIGNMENT;
+
+   u_upload_alloc(vctx->transfer_uploader, 0, size + align_offset,
+                  VIRGL_MAP_BUFFER_ALIGNMENT,
+                  &vtransfer->copy_src_offset,
+                  &vtransfer->copy_src_res, &map_addr);
+   if (map_addr) {
+      /* Update source offset and address to point to the requested x coordinate
+       * if we have an align_offset (see above for more information). */
+      vtransfer->copy_src_offset += align_offset;
+      map_addr += align_offset;
+
+      /* Mark as dirty, since we are updating the host side resource
+       * without going through the corresponding guest side resource, and
+       * hence the two will diverge.
+       */
+      virgl_resource_dirty(vres, vtransfer->base.level);
+
+      /* The pointer returned by u_upload_alloc already has +offset
+       * applied. */
+      vctx->transfer_uploader_in_use = true;
+   }
+
+   return map_addr;
+}
index 358ce3d926f1648aeec7557b71fc73b6fe95bacf..2b9de1b4be476038e5a199fb753263b7e926fc26 100644 (file)
@@ -64,6 +64,10 @@ struct virgl_resource {
 enum virgl_transfer_map_type {
    VIRGL_TRANSFER_MAP_ERROR = -1,
    VIRGL_TRANSFER_MAP_HW_RES,
+   /* Map a range of a staging buffer. The updated contents should be transferred
+    * with a copy transfer.
+    */
+   VIRGL_TRANSFER_MAP_STAGING,
 };
 
 struct virgl_transfer {
@@ -169,4 +173,7 @@ boolean virgl_resource_get_handle(struct pipe_screen *screen,
 
 void virgl_resource_dirty(struct virgl_resource *res, uint32_t level);
 
+void *virgl_transfer_uploader_map(struct virgl_context *vctx,
+                                  struct virgl_transfer *vtransfer);
+
 #endif
index 6144dfe486174ec5a76ab4226f37a7495190ab21..f3785e997666d052c8897cf6ed3bc7db2cef8e1f 100644 (file)
@@ -223,7 +223,6 @@ static void add_internal(struct virgl_transfer_queue *queue,
                          struct virgl_transfer *transfer)
 {
    uint32_t dwords = VIRGL_TRANSFER3D_SIZE + 1;
-
    if (queue->tbuf) {
       if (queue->num_dwords + dwords >= VIRGL_MAX_TBUF_DWORDS) {
          struct list_iteration_args iter;