anv: Use submit-time implicit sync instead of allocate-time
authorJason Ekstrand <jason@jlekstrand.net>
Thu, 21 Nov 2019 12:10:32 +0000 (06:10 -0600)
committerJason Ekstrand <jason@jlekstrand.net>
Fri, 6 Dec 2019 19:58:07 +0000 (19:58 +0000)
In 83b943cc2f24, we started making all VkDeviceMemory BOs resident all
the time.  One unfortunate side-effect of this is that every
vkQueueSubmit sets EXEC_OBJECT_WRITE on every WSI memory object which
means that X server or Wayland compositor, instead of waiting on the
last vkQueueSubmit to actually write the buffer, now waits on the last
vkQueueSubmit to from that driver instance relative to whenever the
compositor's GL driver instance calls execbuf.  This potentially leads
to a lot of extra synchronization that we didn't intend to have.

Instead, this commit makes it so that we leave WSI memory objects with
EXEC_OBJECT_ASYNC most of the time and only unset EXEC_OBJECT_ASYNC and
set EXEC_OBJECT_WRITE in the dummy execbuf that we do as part of
vkQueuePresent.  This should hopefully result in tighter integration
with the compositor, lower latency, and better performance.

Testing with DOOM 2016, this seems to reduce latency by at least a frame
if not two and makes the game much more responsive.  Testing was,
however, subjective, so we don't have any hard data on that.

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
src/intel/vulkan/anv_device.c
src/intel/vulkan/anv_queue.c

index 9db62ef919ecdfcd1fc19053e28bd1711da1cc42..027a908f8e7d36d9d3adfb778e3063e6673a3e51 100644 (file)
@@ -3139,19 +3139,6 @@ VkResult anv_AllocateMemory(
          break;
       }
 
-      case VK_STRUCTURE_TYPE_WSI_MEMORY_ALLOCATE_INFO_MESA: {
-         const struct wsi_memory_allocate_info *wsi_info = (void *)ext;
-         if (wsi_info->implicit_sync) {
-            /* We need to set the WRITE flag on window system buffers so that
-             * GEM will know we're writing to them and synchronize uses on
-             * other rings (eg if the display server uses the blitter ring).
-             */
-            alloc_flags |= ANV_BO_ALLOC_IMPLICIT_SYNC |
-                           ANV_BO_ALLOC_IMPLICIT_WRITE;
-         }
-         break;
-      }
-
       default:
          anv_debug_ignored_stype(ext->sType);
          break;
index 791975d1fcf2011a30876af35823ca008a977e80..abcedfd95d185631ff27787cb558efc551cad4f9 100644 (file)
@@ -702,6 +702,7 @@ anv_queue_submit(struct anv_queue *queue,
                  const VkSemaphore *out_semaphores,
                  const uint64_t *out_values,
                  uint32_t num_out_semaphores,
+                 struct anv_bo *wsi_signal_bo,
                  VkFence _fence)
 {
    ANV_FROM_HANDLE(anv_fence, fence, _fence);
@@ -829,6 +830,12 @@ anv_queue_submit(struct anv_queue *queue,
       }
    }
 
+   if (wsi_signal_bo) {
+      result = anv_queue_submit_add_fence_bo(submit, wsi_signal_bo, true /* signal */);
+      if (result != VK_SUCCESS)
+         goto error;
+   }
+
    if (fence) {
       /* Under most circumstances, out fences won't be temporary.  However,
        * the spec does allow it for opaque_fd.  From the Vulkan 1.0.53 spec:
@@ -923,7 +930,8 @@ VkResult anv_QueueSubmit(
        * come up with something more efficient but this shouldn't be a
        * common case.
        */
-      result = anv_queue_submit(queue, NULL, NULL, NULL, 0, NULL, NULL, 0, fence);
+      result = anv_queue_submit(queue, NULL, NULL, NULL, 0, NULL, NULL, 0,
+                                NULL, fence);
       goto out;
    }
 
@@ -931,6 +939,13 @@ VkResult anv_QueueSubmit(
       /* Fence for this submit.  NULL for all but the last one */
       VkFence submit_fence = (i == submitCount - 1) ? fence : VK_NULL_HANDLE;
 
+      const struct wsi_memory_signal_submit_info *mem_signal_info =
+         vk_find_struct_const(pSubmits[i].pNext,
+                              WSI_MEMORY_SIGNAL_SUBMIT_INFO_MESA);
+      struct anv_bo *wsi_signal_bo =
+         mem_signal_info && mem_signal_info->memory != VK_NULL_HANDLE ?
+         anv_device_memory_from_handle(mem_signal_info->memory)->bo : NULL;
+
       const VkTimelineSemaphoreSubmitInfoKHR *timeline_info =
          vk_find_struct_const(pSubmits[i].pNext,
                               TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR);
@@ -954,6 +969,7 @@ VkResult anv_QueueSubmit(
                                    pSubmits[i].pSignalSemaphores,
                                    signal_values,
                                    pSubmits[i].signalSemaphoreCount,
+                                   wsi_signal_bo,
                                    submit_fence);
          if (result != VK_SUCCESS)
             goto out;
@@ -992,7 +1008,7 @@ VkResult anv_QueueSubmit(
          result = anv_queue_submit(queue, cmd_buffer,
                                    in_semaphores, in_values, num_in_semaphores,
                                    out_semaphores, out_values, num_out_semaphores,
-                                   execbuf_fence);
+                                   wsi_signal_bo, execbuf_fence);
          if (result != VK_SUCCESS)
             goto out;
       }