virgl: introduce transfer queue
authorGurchetan Singh <gurchetansingh@chromium.org>
Fri, 28 Dec 2018 19:19:03 +0000 (11:19 -0800)
committerGert Wollny <gert.wollny@collabora.com>
Fri, 15 Feb 2019 10:19:05 +0000 (11:19 +0100)
Transfers will be placed here at unmap time instead of incurring
a VM exit. There's an attempt to deduplicate intersecting 1D transfers,
which are surprisingly common.

This can also help with mipmapped texture upload and smaller
textures, where the majority of the time is spent in the guest
kernel / QEMU -- not virglrenderer.  This is shown by the GLbench
texture upload benchmark:

Before:
    texture_upload_rgba_teximage2d_32 = 64.23 mtexel_sec
After:
    texture_upload_rgba_teximage2d_32 = 367.44 mtexel_sec

v2: Split up list iteration functions (@gerddie)
v3: Support for optimizing glBufferSubData
Reviewed-by: Gert Wollny <gert.wollny@collabora.com>
src/gallium/drivers/virgl/Makefile.sources
src/gallium/drivers/virgl/meson.build
src/gallium/drivers/virgl/virgl_resource.h
src/gallium/drivers/virgl/virgl_transfer_queue.c [new file with mode: 0644]
src/gallium/drivers/virgl/virgl_transfer_queue.h [new file with mode: 0644]

index c27d284e2485de8fcf7f34bf95ea7cc1dc607306..a5d97657eacf92c7336950c271bdec221a86d7ad 100644 (file)
@@ -15,4 +15,6 @@ C_SOURCES := \
        virgl_streamout.c \
        virgl_texture.c \
        virgl_tgsi.c \
+       virgl_transfer_queue.c \
+       virgl_transfer_queue.h \
        virgl_winsys.h
index 16d185c57232aa94259aad0f6164531ff90daf0a..289203d0459bf8a7189ac4f09a585bb8e6464b74 100644 (file)
@@ -26,6 +26,7 @@ files_libvirgl = files(
   'virgl_resource.c',
   'virgl_screen.c',
   'virgl_streamout.c',
+  'virgl_transfer_queue.c',
   'virgl_texture.c',
   'virgl_tgsi.c',
 )
index acc046ac442eb9203202a51e15a3735b5272602e..01f6bd7ce7d2c635168d7eb6dc1d3770676ee51f 100644 (file)
@@ -55,6 +55,7 @@ struct virgl_transfer {
    struct pipe_transfer base;
    uint32_t offset, l_stride;
    struct util_range range;
+   struct list_head queue_link;
    struct virgl_resource *resolve_tmp;
 };
 
diff --git a/src/gallium/drivers/virgl/virgl_transfer_queue.c b/src/gallium/drivers/virgl/virgl_transfer_queue.c
new file mode 100644 (file)
index 0000000..25267d4
--- /dev/null
@@ -0,0 +1,333 @@
+/*
+ * Copyright 2018 Chromium.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "util/u_box.h"
+#include "util/u_inlines.h"
+
+#include "virgl_protocol.h"
+#include "virgl_screen.h"
+#include "virgl_encode.h"
+#include "virgl_transfer_queue.h"
+
+struct list_action_args
+{
+   void *data;
+   struct virgl_transfer *queued;
+   struct virgl_transfer *current;
+};
+
+typedef bool (*compare_transfers_t)(struct virgl_transfer *queued,
+                                    struct virgl_transfer *current);
+
+typedef void (*list_action_t)(struct virgl_transfer_queue *queue,
+                              struct list_action_args *args);
+
+struct list_iteration_args
+{
+   void *data;
+   list_action_t action;
+   compare_transfers_t compare;
+   struct virgl_transfer *current;
+   enum virgl_transfer_queue_lists type;
+};
+
+static bool transfers_intersect(struct virgl_transfer *queued,
+                                struct virgl_transfer *current)
+{
+   boolean tmp;
+   struct pipe_resource *queued_res = queued->base.resource;
+   struct pipe_resource *current_res = current->base.resource;
+
+   if (queued_res != current_res)
+      return false;
+
+   tmp = u_box_test_intersection_2d(&queued->base.box, &current->base.box);
+   return (tmp == TRUE);
+}
+
+static bool transfers_overlap(struct virgl_transfer *queued,
+                              struct virgl_transfer *current)
+{
+   boolean tmp;
+   struct pipe_resource *queued_res = queued->base.resource;
+   struct pipe_resource *current_res = current->base.resource;
+
+   if (queued_res != current_res)
+      return false;
+
+   if (queued->base.level != current->base.level)
+      return false;
+
+   if (queued->base.box.z != current->base.box.z)
+      return true;
+
+   if (queued->base.box.depth != 1 || current->base.box.depth != 1)
+      return true;
+
+   /*
+    * Special case for boxes with [x: 0, width: 1] and [x: 1, width: 1].
+    */
+   if (queued_res->target == PIPE_BUFFER) {
+      if (queued->base.box.x + queued->base.box.width == current->base.box.x)
+         return false;
+
+      if (current->base.box.x + current->base.box.width == queued->base.box.x)
+         return false;
+   }
+
+   tmp = u_box_test_intersection_2d(&queued->base.box, &current->base.box);
+   return (tmp == TRUE);
+}
+
+static void set_true(UNUSED struct virgl_transfer_queue *queue,
+                     struct list_action_args *args)
+{
+   bool *val = args->data;
+   *val = true;
+}
+
+static void remove_transfer(struct virgl_transfer_queue *queue,
+                            struct list_action_args *args)
+{
+   struct virgl_transfer *queued = args->queued;
+   struct pipe_resource *pres = queued->base.resource;
+   list_del(&queued->queue_link);
+   pipe_resource_reference(&pres, NULL);
+   virgl_resource_destroy_transfer(queue->pool, queued);
+}
+
+static void replace_unmapped_transfer(struct virgl_transfer_queue *queue,
+                                      struct list_action_args *args)
+{
+   struct virgl_transfer *current = args->current;
+   struct virgl_transfer *queued = args->queued;
+
+   u_box_union_2d(&current->base.box, &current->base.box, &queued->base.box);
+   current->offset = current->base.box.x;
+
+   remove_transfer(queue, args);
+   queue->num_dwords -= (VIRGL_TRANSFER3D_SIZE + 1);
+}
+
+static void transfer_put(struct virgl_transfer_queue *queue,
+                         struct list_action_args *args)
+{
+   struct virgl_transfer *queued = args->queued;
+   struct virgl_resource *res = virgl_resource(queued->base.resource);
+
+   queue->vs->vws->transfer_put(queue->vs->vws, res->hw_res, &queued->base.box,
+                                queued->base.stride, queued->l_stride,
+                                queued->offset, queued->base.level);
+
+   remove_transfer(queue, args);
+}
+
+static void transfer_write(struct virgl_transfer_queue *queue,
+                           struct list_action_args *args)
+{
+   struct virgl_transfer *queued = args->queued;
+   struct virgl_cmd_buf *buf = args->data;
+
+   // Takes a reference on the HW resource, which is released after
+   // the exec buffer command.
+   virgl_encode_transfer(queue->vs, buf, queued, VIRGL_TRANSFER_TO_HOST);
+
+   list_delinit(&queued->queue_link);
+   list_addtail(&queued->queue_link, &queue->lists[COMPLETED_LIST]);
+}
+
+static void compare_and_perform_action(struct virgl_transfer_queue *queue,
+                                       struct list_iteration_args *iter)
+{
+   struct list_action_args args;
+   struct virgl_transfer *queued, *tmp;
+   enum virgl_transfer_queue_lists type = iter->type;
+
+   memset(&args, 0, sizeof(args));
+   args.current = iter->current;
+   args.data = iter->data;
+
+   LIST_FOR_EACH_ENTRY_SAFE(queued, tmp, &queue->lists[type], queue_link) {
+      if (iter->compare(queued, iter->current)) {
+         args.queued = queued;
+         iter->action(queue, &args);
+      }
+   }
+}
+
+static void perform_action(struct virgl_transfer_queue *queue,
+                           struct list_iteration_args *iter)
+{
+   struct list_action_args args;
+   struct virgl_transfer *queued, *tmp;
+   enum virgl_transfer_queue_lists type = iter->type;
+
+   memset(&args, 0, sizeof(args));
+   args.data = iter->data;
+
+   LIST_FOR_EACH_ENTRY_SAFE(queued, tmp, &queue->lists[type], queue_link) {
+      args.queued = queued;
+      iter->action(queue, &args);
+   }
+}
+
+static void add_internal(struct virgl_transfer_queue *queue,
+                         struct virgl_transfer *transfer)
+{
+   uint32_t dwords = VIRGL_TRANSFER3D_SIZE + 1;
+   if (queue->tbuf) {
+      if (queue->num_dwords + dwords >= VIRGL_MAX_TBUF_DWORDS) {
+         struct list_iteration_args iter;
+         struct virgl_winsys *vws = queue->vs->vws;
+
+         memset(&iter, 0, sizeof(iter));
+         iter.type = PENDING_LIST;
+         iter.action = transfer_write;
+         iter.data = queue->tbuf;
+         perform_action(queue, &iter);
+
+         vws->submit_cmd(vws, queue->tbuf, -1, NULL);
+         queue->num_dwords = 0;
+      }
+   }
+
+   list_addtail(&transfer->queue_link, &queue->lists[PENDING_LIST]);
+   queue->num_dwords += dwords;
+}
+
+
+void virgl_transfer_queue_init(struct virgl_transfer_queue *queue,
+                               struct virgl_screen *vs,
+                               struct slab_child_pool *pool)
+{
+   queue->vs = vs;
+   queue->pool = pool;
+   queue->num_dwords = 0;
+
+   for (uint32_t i = 0; i < MAX_LISTS; i++)
+      list_inithead(&queue->lists[i]);
+
+   if ((vs->caps.caps.v2.capability_bits & VIRGL_CAP_TRANSFER) &&
+        vs->vws->supports_encoded_transfers)
+      queue->tbuf = vs->vws->cmd_buf_create(vs->vws, VIRGL_MAX_TBUF_DWORDS);
+   else
+      queue->tbuf = NULL;
+}
+
+void virgl_transfer_queue_fini(struct virgl_transfer_queue *queue)
+{
+   struct virgl_winsys *vws = queue->vs->vws;
+   struct list_iteration_args iter;
+
+   memset(&iter, 0, sizeof(iter));
+
+   iter.action = transfer_put;
+   iter.type = PENDING_LIST;
+   perform_action(queue, &iter);
+
+   iter.action = remove_transfer;
+   iter.type = COMPLETED_LIST;
+   perform_action(queue, &iter);
+
+   if (queue->tbuf)
+      vws->cmd_buf_destroy(queue->tbuf);
+
+   queue->vs = NULL;
+   queue->pool = NULL;
+   queue->tbuf = NULL;
+   queue->num_dwords = 0;
+}
+
+int virgl_transfer_queue_unmap(struct virgl_transfer_queue *queue,
+                               struct virgl_transfer *transfer)
+{
+   struct pipe_resource *res, *pres;
+   struct list_iteration_args iter;
+
+   pres = NULL;
+   res = transfer->base.resource;
+   pipe_resource_reference(&pres, res);
+
+   if (res->target == PIPE_BUFFER) {
+      memset(&iter, 0, sizeof(iter));
+      iter.current = transfer;
+      iter.compare = transfers_intersect;
+      iter.action = replace_unmapped_transfer;
+      iter.type = PENDING_LIST;
+      compare_and_perform_action(queue, &iter);
+   }
+
+   add_internal(queue, transfer);
+   return 0;
+}
+
+int virgl_transfer_queue_clear(struct virgl_transfer_queue *queue,
+                               struct virgl_cmd_buf *cbuf)
+{
+   struct list_iteration_args iter;
+
+   memset(&iter, 0, sizeof(iter));
+   iter.type = PENDING_LIST;
+   if (queue->tbuf) {
+      uint32_t prior_num_dwords = cbuf->cdw;
+      cbuf->cdw = 0;
+
+      iter.action = transfer_write;
+      iter.data = cbuf;
+      perform_action(queue, &iter);
+
+      virgl_encode_end_transfers(cbuf);
+      cbuf->cdw = prior_num_dwords;
+   } else {
+      iter.action = transfer_put;
+      perform_action(queue, &iter);
+   }
+
+   iter.action = remove_transfer;
+   iter.type = COMPLETED_LIST;
+   perform_action(queue, &iter);
+   queue->num_dwords = 0;
+
+   return 0;
+}
+
+bool virgl_transfer_queue_is_queued(struct virgl_transfer_queue *queue,
+                                    struct virgl_transfer *transfer)
+{
+   bool queued = false;
+   struct list_iteration_args iter;
+
+   memset(&iter, 0, sizeof(iter));
+   iter.current = transfer;
+   iter.compare = transfers_overlap;
+   iter.action = set_true;
+   iter.data = &queued;
+
+   iter.type = PENDING_LIST;
+   compare_and_perform_action(queue, &iter);
+
+   iter.type = COMPLETED_LIST;
+   compare_and_perform_action(queue, &iter);
+
+   return queued;
+}
diff --git a/src/gallium/drivers/virgl/virgl_transfer_queue.h b/src/gallium/drivers/virgl/virgl_transfer_queue.h
new file mode 100644 (file)
index 0000000..3aebae0
--- /dev/null
@@ -0,0 +1,53 @@
+/*
+ * Copyright 2018 Chromium.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "virgl_resource.h"
+
+enum virgl_transfer_queue_lists {
+   PENDING_LIST = 0,
+   COMPLETED_LIST = 1,
+   MAX_LISTS = 2,
+};
+
+struct virgl_transfer_queue {
+   struct list_head lists[MAX_LISTS];
+   struct virgl_screen *vs;
+   struct slab_child_pool *pool;
+   struct virgl_cmd_buf *tbuf;
+   uint32_t num_dwords;
+};
+
+void virgl_transfer_queue_init(struct virgl_transfer_queue *queue,
+                               struct virgl_screen *vs,
+                               struct slab_child_pool *pool);
+
+void virgl_transfer_queue_fini(struct virgl_transfer_queue *queue);
+
+int virgl_transfer_queue_unmap(struct virgl_transfer_queue *queue,
+                               struct virgl_transfer *transfer);
+
+int virgl_transfer_queue_clear(struct virgl_transfer_queue *queue,
+                               struct virgl_cmd_buf *buf);
+
+bool virgl_transfer_queue_is_queued(struct virgl_transfer_queue *queue,
+                                    struct virgl_transfer *transfer);