anv: implement shareable timeline semaphores
authorLionel Landwerlin <lionel.g.landwerlin@intel.com>
Wed, 28 Aug 2019 10:22:30 +0000 (13:22 +0300)
committerMarge Bot <eric+marge@anholt.net>
Tue, 1 Sep 2020 16:40:11 +0000 (16:40 +0000)
This implements timeline semaphores using a new type of dma-fence
stored into drm-syncobjs. We use a thread to implement delayed
submissions.

v2: Drop cloning of temporary semaphores and just transfer their ownership (Jason)
    Drain queue when dealing with binary semaphore
    Ensure we don't submit to the thread as long as we don't need to

v3: Use __u64 not uintptr_t for kernel pointers
    Fix commented code for INTEL_DEBUG=bat
    Set DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES in timeline fence execbuf extension
    Add new anv_queue_set_lost()
    Drop multi queue stuff meant for the fake multi queue patch
    Rework temporary syncobj handling
    Don't use syncobj when not available (DeviceWaitIdle/CreateDevice)
    Use ANV_MULTIALLOC
    And a few more tweaks...

v4: Drop drained condition helper (Lionel)
    Fix missing EXEC_OBJECT_WRITE on BOs we want to wait on (Jason)

v5: Add missing device->lost_reported in _anv_device_report_lost (Lionel)
    Fix missing free on submit->simple_bo (Lionel)
    Don't drop setting the device in lost state on QueueSubmit error (Jason)
    Store submit->fence_bos as an array of uintptr_t (Jason)

v6: condition device->has_thread_submit to i915 & core DRM support (Jason)

v7: Fix submit->in_fence leakage on error (Jason)
    Keep dummy semaphore with no thread submission (Jason)

v8: Move ownership of submit->out_fence to submit (Jason)

v9: Don't forget to read the VkFence's syncobj binary payload (Lionel)

v10: Take the mutex lock on anv_gem_close() (Jason/Lionel)

v11: Fix void* -> u64 cast on 32bit (Lionel)

v12: Rebase after BO backed timeline semaphore (Lionel)

v13: Fix missing snippets lost after rebase (Lionel)

v14: Drop update_binary usage (Lionel)

v15: Use ANV_MULTIALLOC (Lionel)

v16: Fix some realloc issues (Ivan)

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net> (v8)
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/2901>

src/intel/vulkan/anv_batch_chain.c
src/intel/vulkan/anv_device.c
src/intel/vulkan/anv_private.h
src/intel/vulkan/anv_queue.c
src/intel/vulkan/anv_wsi.c

index 21cead1df7b16b6884a826927eb7e872f4525c79..4aefa58ea1eaa45605cc5f5712265a4edeb45d8e 100644 (file)
@@ -1091,6 +1091,8 @@ anv_cmd_buffer_add_secondary(struct anv_cmd_buffer *primary,
 struct anv_execbuf {
    struct drm_i915_gem_execbuffer2           execbuf;
 
+   struct drm_i915_gem_execbuffer_ext_timeline_fences timeline_fences;
+
    struct drm_i915_gem_exec_object2 *        objects;
    uint32_t                                  bo_count;
    struct anv_bo **                          bos;
@@ -1119,6 +1121,24 @@ anv_execbuf_finish(struct anv_execbuf *exec)
    vk_free(exec->alloc, exec->bos);
 }
 
+static void
+anv_execbuf_add_ext(struct anv_execbuf *exec,
+                    uint32_t ext_name,
+                    struct i915_user_extension *ext)
+{
+   __u64 *iter = &exec->execbuf.cliprects_ptr;
+
+   exec->execbuf.flags |= I915_EXEC_USE_EXTENSIONS;
+
+   while (*iter != 0) {
+      iter = (__u64 *) &((struct i915_user_extension *)(uintptr_t)*iter)->next_extension;
+   }
+
+   ext->name = ext_name;
+
+   *iter = (uintptr_t) ext;
+}
+
 static VkResult
 anv_execbuf_add_bo_bitset(struct anv_device *device,
                           struct anv_execbuf *exec,
@@ -1754,18 +1774,30 @@ anv_queue_execbuf_locked(struct anv_queue *queue,
 
    if (submit->fence_count > 0) {
       assert(device->physical->has_syncobj);
-      execbuf.execbuf.flags |= I915_EXEC_FENCE_ARRAY;
-      execbuf.execbuf.num_cliprects = submit->fence_count;
-      execbuf.execbuf.cliprects_ptr = (uintptr_t)submit->fences;
+      if (device->has_thread_submit) {
+         execbuf.timeline_fences.fence_count = submit->fence_count;
+         execbuf.timeline_fences.handles_ptr = (uintptr_t)submit->fences;
+         execbuf.timeline_fences.values_ptr = (uintptr_t)submit->fence_values;
+         anv_execbuf_add_ext(&execbuf,
+                             DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES,
+                             &execbuf.timeline_fences.base);
+      } else {
+         execbuf.execbuf.flags |= I915_EXEC_FENCE_ARRAY;
+         execbuf.execbuf.num_cliprects = submit->fence_count;
+         execbuf.execbuf.cliprects_ptr = (uintptr_t)submit->fences;
+      }
    }
 
    if (submit->in_fence != -1) {
+      assert(!device->has_thread_submit);
       execbuf.execbuf.flags |= I915_EXEC_FENCE_IN;
       execbuf.execbuf.rsvd2 |= (uint32_t)submit->in_fence;
    }
 
-   if (submit->need_out_fence)
+   if (submit->need_out_fence) {
+      assert(!device->has_thread_submit);
       execbuf.execbuf.flags |= I915_EXEC_FENCE_OUT;
+   }
 
    if (has_perf_query) {
       struct anv_query_pool *query_pool = submit->cmd_buffer->perf_query_pool;
index 55d079e133f53c021996df810684951a0c3fa5e7..be4d1909d2e6971880c89b87be294ac040c3c31b 100644 (file)
@@ -460,6 +460,9 @@ anv_physical_device_try_create(struct anv_instance *instance,
    if (env_var_as_boolean("ANV_QUEUE_THREAD_DISABLE", false))
       device->has_exec_timeline = false;
 
+   device->has_thread_submit =
+      device->has_syncobj_wait_available && device->has_exec_timeline;
+
    device->always_use_bindless =
       env_var_as_boolean("ANV_ALWAYS_BINDLESS", false);
 
@@ -2821,6 +2824,8 @@ VkResult anv_CreateDevice(
       goto fail_fd;
    }
 
+   device->has_thread_submit = physical_device->has_thread_submit;
+
    result = anv_queue_init(device, &device->queue);
    if (result != VK_SUCCESS)
       goto fail_context_id;
@@ -3111,12 +3116,12 @@ void anv_DestroyDevice(
    if (!device)
       return;
 
+   anv_queue_finish(&device->queue);
+
    anv_device_finish_blorp(device);
 
    anv_pipeline_cache_finish(&device->default_pipeline_cache);
 
-   anv_queue_finish(&device->queue);
-
 #ifdef HAVE_VALGRIND
    /* We only need to free these to prevent valgrind errors.  The backing
     * BO will go away in a couple of lines so we don't actually leak.
@@ -3228,6 +3233,22 @@ void anv_GetDeviceQueue2(
       *pQueue = NULL;
 }
 
+void
+_anv_device_report_lost(struct anv_device *device)
+{
+   assert(p_atomic_read(&device->_lost) > 0);
+
+   device->lost_reported = true;
+
+   struct anv_queue *queue = &device->queue;
+
+   __vk_errorf(device->physical->instance, device,
+               VK_DEBUG_REPORT_OBJECT_TYPE_DEVICE_EXT,
+               VK_ERROR_DEVICE_LOST,
+               queue->error_file, queue->error_line,
+               "%s", queue->error_msg);
+}
+
 VkResult
 _anv_device_set_lost(struct anv_device *device,
                      const char *file, int line,
@@ -3236,7 +3257,11 @@ _anv_device_set_lost(struct anv_device *device,
    VkResult err;
    va_list ap;
 
+   if (p_atomic_read(&device->_lost) > 0)
+      return VK_ERROR_DEVICE_LOST;
+
    p_atomic_inc(&device->_lost);
+   device->lost_reported = true;
 
    va_start(ap, msg);
    err = __vk_errorv(device->physical->instance, device,
@@ -3252,24 +3277,29 @@ _anv_device_set_lost(struct anv_device *device,
 
 VkResult
 _anv_queue_set_lost(struct anv_queue *queue,
-                    const char *file, int line,
-                    const char *msg, ...)
+                     const char *file, int line,
+                     const char *msg, ...)
 {
-   VkResult err;
    va_list ap;
 
-   p_atomic_inc(&queue->device->_lost);
+   if (queue->lost)
+      return VK_ERROR_DEVICE_LOST;
 
+   queue->lost = true;
+
+   queue->error_file = file;
+   queue->error_line = line;
    va_start(ap, msg);
-   err = __vk_errorv(queue->device->physical->instance, queue->device,
-                     VK_DEBUG_REPORT_OBJECT_TYPE_DEVICE_EXT,
-                     VK_ERROR_DEVICE_LOST, file, line, msg, ap);
+   vsnprintf(queue->error_msg, sizeof(queue->error_msg),
+             msg, ap);
    va_end(ap);
 
+   p_atomic_inc(&queue->device->_lost);
+
    if (env_var_as_boolean("ANV_ABORT_ON_DEVICE_LOSS", false))
       abort();
 
-   return err;
+   return VK_ERROR_DEVICE_LOST;
 }
 
 VkResult
index 54b1c730791bbd3eb6ed25a86ed61b9c8f4dd181..46f64cfd8b2bacb877f29a539250c51f932a904c 100644 (file)
@@ -1082,6 +1082,7 @@ struct anv_physical_device {
     bool                                        has_syncobj_wait_available;
     bool                                        has_context_priority;
     bool                                        has_context_isolation;
+    bool                                        has_thread_submit;
     bool                                        has_mem_available;
     bool                                        has_mmap_offset;
     uint64_t                                    gtt_size;
@@ -1183,6 +1184,7 @@ struct anv_queue_submit {
    uint32_t                                  fence_count;
    uint32_t                                  fence_array_length;
    struct drm_i915_gem_exec_fence *          fences;
+   uint64_t *                                fence_values;
 
    uint32_t                                  temporary_semaphore_count;
    uint32_t                                  temporary_semaphore_array_length;
@@ -1194,7 +1196,10 @@ struct anv_queue_submit {
    uint32_t                                  sync_fd_semaphore_array_length;
 
    /* Allocated only with non shareable timelines. */
-   struct anv_timeline **                    wait_timelines;
+   union {
+      struct anv_timeline **                 wait_timelines;
+      uint32_t *                             wait_timeline_syncobjs;
+   };
    uint32_t                                  wait_timeline_count;
    uint32_t                                  wait_timeline_array_length;
    uint64_t *                                wait_timeline_values;
@@ -1229,14 +1234,34 @@ struct anv_queue_submit {
 struct anv_queue {
     struct vk_object_base                       base;
 
-    struct anv_device *                         device;
+   struct anv_device *                       device;
 
-    /*
-     * A list of struct anv_queue_submit to be submitted to i915.
-     */
-    struct list_head                            queued_submits;
+   VkDeviceQueueCreateFlags                  flags;
+
+   /* Set once from the device api calls. */
+   bool                                      lost_signaled;
+
+   /* Only set once atomically by the queue */
+   int                                       lost;
+   int                                       error_line;
+   const char *                              error_file;
+   char                                      error_msg[80];
+
+   /*
+    * This mutext protects the variables below.
+    */
+   pthread_mutex_t                           mutex;
+
+   pthread_t                                 thread;
+   pthread_cond_t                            cond;
+
+   /*
+    * A list of struct anv_queue_submit to be submitted to i915.
+    */
+   struct list_head                          queued_submits;
 
-    VkDeviceQueueCreateFlags                    flags;
+   /* Set to true to stop the submission thread */
+   bool                                      quit;
 };
 
 struct anv_pipeline_cache {
@@ -1330,6 +1355,7 @@ struct anv_device {
     int                                         fd;
     bool                                        can_chain_batches;
     bool                                        robust_buffer_access;
+    bool                                        has_thread_submit;
     struct anv_device_extension_table           enabled_extensions;
     struct anv_device_dispatch_table            dispatch;
 
@@ -1382,6 +1408,7 @@ struct anv_device {
     pthread_mutex_t                             mutex;
     pthread_cond_t                              queue_submit;
     int                                         _lost;
+    int                                         lost_reported;
 
     struct gen_batch_decode_ctx                 decoder_ctx;
     /*
@@ -1439,7 +1466,7 @@ anv_mocs_for_bo(const struct anv_device *device, const struct anv_bo *bo)
 void anv_device_init_blorp(struct anv_device *device);
 void anv_device_finish_blorp(struct anv_device *device);
 
-void _anv_device_set_all_queue_lost(struct anv_device *device);
+void _anv_device_report_lost(struct anv_device *device);
 VkResult _anv_device_set_lost(struct anv_device *device,
                               const char *file, int line,
                               const char *msg, ...)
@@ -1451,12 +1478,17 @@ VkResult _anv_queue_set_lost(struct anv_queue *queue,
 #define anv_device_set_lost(dev, ...) \
    _anv_device_set_lost(dev, __FILE__, __LINE__, __VA_ARGS__)
 #define anv_queue_set_lost(queue, ...) \
-   _anv_queue_set_lost(queue, __FILE__, __LINE__, __VA_ARGS__)
+   (queue)->device->has_thread_submit ? \
+   _anv_queue_set_lost(queue, __FILE__, __LINE__, __VA_ARGS__) : \
+   _anv_device_set_lost(queue->device, __FILE__, __LINE__, __VA_ARGS__)
 
 static inline bool
 anv_device_is_lost(struct anv_device *device)
 {
-   return unlikely(p_atomic_read(&device->_lost));
+   int lost = p_atomic_read(&device->_lost);
+   if (unlikely(lost && !device->lost_reported))
+      _anv_device_report_lost(device);
+   return lost;
 }
 
 VkResult anv_device_query_status(struct anv_device *device);
@@ -3176,6 +3208,7 @@ enum anv_semaphore_type {
    ANV_SEMAPHORE_TYPE_SYNC_FILE,
    ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ,
    ANV_SEMAPHORE_TYPE_TIMELINE,
+   ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ_TIMELINE,
 };
 
 struct anv_timeline_point {
index 25646d07f1a9f94091ea72e9b71bf7f9271d96e4..fdf10f2c01266dc6013ccb90f2ec1994a34fb31a 100644 (file)
@@ -95,11 +95,16 @@ anv_queue_submit_free(struct anv_device *device,
    for (uint32_t i = 0; i < submit->sync_fd_semaphore_count; i++)
       anv_semaphore_unref(device, submit->sync_fd_semaphores[i]);
    /* Execbuf does not consume the in_fence.  It's our job to close it. */
-   if (submit->in_fence != -1)
+   if (submit->in_fence != -1) {
+      assert(!device->has_thread_submit);
       close(submit->in_fence);
-   if (submit->out_fence != -1)
+   }
+   if (submit->out_fence != -1) {
+      assert(!device->has_thread_submit);
       close(submit->out_fence);
+   }
    vk_free(alloc, submit->fences);
+   vk_free(alloc, submit->fence_values);
    vk_free(alloc, submit->temporary_semaphores);
    vk_free(alloc, submit->wait_timelines);
    vk_free(alloc, submit->wait_timeline_values);
@@ -349,6 +354,98 @@ anv_device_submit_deferred_locked(struct anv_device *device)
    return anv_queue_submit_deferred_locked(&device->queue, &advance);
 }
 
+static void
+anv_queue_submit_signal_fences(struct anv_device *device,
+                               struct anv_queue_submit *submit)
+{
+   for (uint32_t i = 0; i < submit->fence_count; i++) {
+      if (submit->fences[i].flags & I915_EXEC_FENCE_SIGNAL) {
+         anv_gem_syncobj_timeline_signal(device, &submit->fences[i].handle,
+                                         &submit->fence_values[i], 1);
+      }
+   }
+}
+
+static void *
+anv_queue_task(void *_queue)
+{
+   struct anv_queue *queue = _queue;
+
+   pthread_mutex_lock(&queue->mutex);
+
+   while (!queue->quit) {
+      while (!list_is_empty(&queue->queued_submits)) {
+         struct anv_queue_submit *submit =
+            list_first_entry(&queue->queued_submits, struct anv_queue_submit, link);
+         list_del(&submit->link);
+
+         pthread_mutex_unlock(&queue->mutex);
+
+         VkResult result = VK_ERROR_DEVICE_LOST;
+
+         /* Wait for timeline points to materialize before submitting. We need
+          * to do this because we're using threads to do the submit to i915.
+          * We could end up in a situation where the application submits to 2
+          * queues with the first submit creating the dma-fence for the
+          * second. But because the scheduling of the submission threads might
+          * wakeup the second queue thread first, this would make that execbuf
+          * fail because the dma-fence it depends on hasn't materialized yet.
+          */
+         if (!queue->lost && submit->wait_timeline_count > 0) {
+            int ret = queue->device->no_hw ? 0 :
+               anv_gem_syncobj_timeline_wait(
+                  queue->device, submit->wait_timeline_syncobjs,
+                  submit->wait_timeline_values, submit->wait_timeline_count,
+                  anv_get_absolute_timeout(UINT64_MAX) /* wait forever */,
+                  true /* wait for all */, true /* wait for materialize */);
+            if (ret) {
+               result = anv_queue_set_lost(queue, "timeline timeout: %s",
+                                           strerror(errno));
+            }
+         }
+
+         /* Now submit */
+         if (!queue->lost) {
+            pthread_mutex_lock(&queue->device->mutex);
+            result = anv_queue_execbuf_locked(queue, submit);
+            pthread_mutex_unlock(&queue->device->mutex);
+         }
+
+         for (uint32_t i = 0; i < submit->sync_fd_semaphore_count; i++) {
+            struct anv_semaphore *semaphore = submit->sync_fd_semaphores[i];
+            /* Out fences can't have temporary state because that would imply
+             * that we imported a sync file and are trying to signal it.
+             */
+            assert(semaphore->temporary.type == ANV_SEMAPHORE_TYPE_NONE);
+            struct anv_semaphore_impl *impl = &semaphore->permanent;
+
+            assert(impl->type == ANV_SEMAPHORE_TYPE_SYNC_FILE);
+            impl->fd = dup(submit->out_fence);
+         }
+
+         if (result != VK_SUCCESS) {
+            /* vkQueueSubmit or some other entry point will report the
+             * DEVICE_LOST error at some point, but until we have emptied our
+             * list of execbufs we need to wake up all potential the waiters
+             * until one of them spots the error.
+             */
+            anv_queue_submit_signal_fences(queue->device, submit);
+         }
+
+         anv_queue_submit_free(queue->device, submit);
+
+         pthread_mutex_lock(&queue->mutex);
+      }
+
+      if (!queue->quit)
+         pthread_cond_wait(&queue->cond, &queue->mutex);
+   }
+
+   pthread_mutex_unlock(&queue->mutex);
+
+   return NULL;
+}
+
 static VkResult
 _anv_queue_submit(struct anv_queue *queue, struct anv_queue_submit **_submit,
                   bool flush_queue)
@@ -360,42 +457,92 @@ _anv_queue_submit(struct anv_queue *queue, struct anv_queue_submit **_submit,
     * anv_queue.
     */
    *_submit = NULL;
+   if (queue->device->has_thread_submit) {
+      pthread_mutex_lock(&queue->mutex);
+      pthread_cond_broadcast(&queue->cond);
+      list_addtail(&submit->link, &queue->queued_submits);
+      pthread_mutex_unlock(&queue->mutex);
+      return VK_SUCCESS;
+   } else {
+      pthread_mutex_lock(&queue->device->mutex);
+      list_addtail(&submit->link, &queue->queued_submits);
+      VkResult result = anv_device_submit_deferred_locked(queue->device);
+      if (flush_queue) {
+         while (result == VK_SUCCESS && !list_is_empty(&queue->queued_submits)) {
+            int ret = pthread_cond_wait(&queue->device->queue_submit,
+                                        &queue->device->mutex);
+            if (ret != 0) {
+               result = anv_device_set_lost(queue->device, "wait timeout");
+               break;
+            }
 
-   pthread_mutex_lock(&queue->device->mutex);
-   list_addtail(&submit->link, &queue->queued_submits);
-   VkResult result = anv_device_submit_deferred_locked(queue->device);
-   if (flush_queue) {
-      while (result == VK_SUCCESS && !list_is_empty(&queue->queued_submits)) {
-         int ret = pthread_cond_wait(&queue->device->queue_submit,
-                                     &queue->device->mutex);
-         if (ret != 0) {
-            result = anv_device_set_lost(queue->device, "wait timeout");
-            break;
+            result = anv_device_submit_deferred_locked(queue->device);
          }
-
-         result = anv_device_submit_deferred_locked(queue->device);
       }
+      pthread_mutex_unlock(&queue->device->mutex);
+      return result;
    }
-   pthread_mutex_unlock(&queue->device->mutex);
-   return result;
 }
 
 VkResult
 anv_queue_init(struct anv_device *device, struct anv_queue *queue)
 {
-   vk_object_base_init(&device->vk, &queue->base, VK_OBJECT_TYPE_QUEUE);
+   VkResult result;
+
    queue->device = device;
    queue->flags = 0;
+   queue->lost = false;
+   queue->quit = false;
 
    list_inithead(&queue->queued_submits);
 
+   /* We only need those additional thread/mutex when using a thread for
+    * submission.
+    */
+   if (device->has_thread_submit) {
+      if (pthread_mutex_init(&queue->mutex, NULL) != 0)
+         return vk_error(VK_ERROR_INITIALIZATION_FAILED);
+
+      if (pthread_cond_init(&queue->cond, NULL) != 0) {
+         result = vk_error(VK_ERROR_INITIALIZATION_FAILED);
+         goto fail_mutex;
+      }
+      if (pthread_create(&queue->thread, NULL, anv_queue_task, queue)) {
+         result = vk_error(VK_ERROR_INITIALIZATION_FAILED);
+         goto fail_cond;
+      }
+   }
+
+   vk_object_base_init(&device->vk, &queue->base, VK_OBJECT_TYPE_QUEUE);
+
    return VK_SUCCESS;
+
+ fail_cond:
+   pthread_cond_destroy(&queue->cond);
+ fail_mutex:
+   pthread_mutex_destroy(&queue->mutex);
+
+   return result;
 }
 
 void
 anv_queue_finish(struct anv_queue *queue)
 {
    vk_object_base_finish(&queue->base);
+
+   if (!queue->device->has_thread_submit)
+      return;
+
+   pthread_mutex_lock(&queue->mutex);
+   pthread_cond_broadcast(&queue->cond);
+   queue->quit = true;
+   pthread_mutex_unlock(&queue->mutex);
+
+   void *ret;
+   pthread_join(queue->thread, &ret);
+
+   pthread_cond_destroy(&queue->cond);
+   pthread_mutex_destroy(&queue->mutex);
 }
 
 static VkResult
@@ -427,10 +574,42 @@ anv_queue_submit_add_fence_bo(struct anv_queue_submit *submit,
 static VkResult
 anv_queue_submit_add_syncobj(struct anv_queue_submit* submit,
                              struct anv_device *device,
-                             uint32_t handle, uint32_t flags)
+                             uint32_t handle, uint32_t flags,
+                             uint64_t value)
 {
    assert(flags != 0);
 
+   if (device->has_thread_submit && (flags & I915_EXEC_FENCE_WAIT)) {
+      if (submit->wait_timeline_count >= submit->wait_timeline_array_length) {
+         uint32_t new_len = MAX2(submit->wait_timeline_array_length * 2, 64);
+
+         uint32_t *new_wait_timeline_syncobjs =
+            vk_realloc(submit->alloc,
+                       submit->wait_timeline_syncobjs,
+                       new_len * sizeof(*submit->wait_timeline_syncobjs),
+                       8, submit->alloc_scope);
+         if (new_wait_timeline_syncobjs == NULL)
+            return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+
+         submit->wait_timeline_syncobjs = new_wait_timeline_syncobjs;
+
+         uint64_t *new_wait_timeline_values =
+            vk_realloc(submit->alloc,
+                       submit->wait_timeline_values, new_len * sizeof(*submit->wait_timeline_values),
+                       8, submit->alloc_scope);
+         if (new_wait_timeline_values == NULL)
+            return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+
+         submit->wait_timeline_values = new_wait_timeline_values;
+         submit->wait_timeline_array_length = new_len;
+      }
+
+      submit->wait_timeline_syncobjs[submit->wait_timeline_count] = handle;
+      submit->wait_timeline_values[submit->wait_timeline_count] = value;
+
+      submit->wait_timeline_count++;
+   }
+
    if (submit->fence_count >= submit->fence_array_length) {
       uint32_t new_len = MAX2(submit->fence_array_length * 2, 64);
       struct drm_i915_gem_exec_fence *new_fences =
@@ -441,13 +620,24 @@ anv_queue_submit_add_syncobj(struct anv_queue_submit* submit,
          return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
 
       submit->fences = new_fences;
+
+      uint64_t *new_fence_values =
+         vk_realloc(submit->alloc,
+                    submit->fence_values, new_len * sizeof(*submit->fence_values),
+                    8, submit->alloc_scope);
+      if (new_fence_values == NULL)
+         return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+
+      submit->fence_values = new_fence_values;
       submit->fence_array_length = new_len;
    }
 
-   submit->fences[submit->fence_count++] = (struct drm_i915_gem_exec_fence) {
+   submit->fences[submit->fence_count] = (struct drm_i915_gem_exec_fence) {
       .handle = handle,
       .flags = flags,
    };
+   submit->fence_values[submit->fence_count] = value;
+   submit->fence_count++;
 
    return VK_SUCCESS;
 }
@@ -595,7 +785,7 @@ anv_queue_submit_simple_batch(struct anv_queue *queue,
       }
 
       result = anv_queue_submit_add_syncobj(submit, device, syncobj,
-                                            I915_EXEC_FENCE_SIGNAL);
+                                            I915_EXEC_FENCE_SIGNAL, 0);
    } else {
       result = anv_device_alloc_bo(device, 4096,
                                    ANV_BO_ALLOC_EXTERNAL |
@@ -742,7 +932,6 @@ anv_queue_submit(struct anv_queue *queue,
    submit->cmd_buffer = cmd_buffer;
 
    VkResult result = VK_SUCCESS;
-
    for (uint32_t i = 0; i < num_in_semaphores; i++) {
       ANV_FROM_HANDLE(anv_semaphore, semaphore, in_semaphores[i]);
       struct anv_semaphore_impl *impl;
@@ -796,7 +985,8 @@ anv_queue_submit(struct anv_queue *queue,
       case ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ: {
          result = anv_queue_submit_add_syncobj(submit, device,
                                                impl->syncobj,
-                                               I915_EXEC_FENCE_WAIT);
+                                               I915_EXEC_FENCE_WAIT,
+                                               0);
          if (result != VK_SUCCESS)
             goto error;
          break;
@@ -810,6 +1000,15 @@ anv_queue_submit(struct anv_queue *queue,
             goto error;
          break;
 
+      case ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ_TIMELINE:
+         result = anv_queue_submit_add_syncobj(submit, device,
+                                               impl->syncobj,
+                                               I915_EXEC_FENCE_WAIT,
+                                               in_values ? in_values[i] : 0);
+         if (result != VK_SUCCESS)
+            goto error;
+         break;
+
       default:
          break;
       }
@@ -850,7 +1049,8 @@ anv_queue_submit(struct anv_queue *queue,
 
       case ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ: {
          result = anv_queue_submit_add_syncobj(submit, device, impl->syncobj,
-                                               I915_EXEC_FENCE_SIGNAL);
+                                               I915_EXEC_FENCE_SIGNAL,
+                                               0);
          if (result != VK_SUCCESS)
             goto error;
          break;
@@ -864,6 +1064,14 @@ anv_queue_submit(struct anv_queue *queue,
             goto error;
          break;
 
+      case ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ_TIMELINE:
+         result = anv_queue_submit_add_syncobj(submit, device, impl->syncobj,
+                                               I915_EXEC_FENCE_SIGNAL,
+                                               out_values ? out_values[i] : 0);
+         if (result != VK_SUCCESS)
+            goto error;
+         break;
+
       default:
          break;
       }
@@ -893,6 +1101,7 @@ anv_queue_submit(struct anv_queue *queue,
 
       switch (impl->type) {
       case ANV_FENCE_TYPE_BO:
+         assert(!device->has_thread_submit);
          result = anv_queue_submit_add_fence_bo(submit, impl->bo.bo, true /* signal */);
          if (result != VK_SUCCESS)
             goto error;
@@ -904,8 +1113,11 @@ anv_queue_submit(struct anv_queue *queue,
           * also reset the fence's syncobj so that they don't contain a
           * signaled dma-fence.
           */
+         anv_gem_syncobj_reset(device, impl->syncobj);
+
          result = anv_queue_submit_add_syncobj(submit, device, impl->syncobj,
-                                               I915_EXEC_FENCE_SIGNAL);
+                                               I915_EXEC_FENCE_SIGNAL,
+                                               0);
          if (result != VK_SUCCESS)
             goto error;
          break;
@@ -921,6 +1133,7 @@ anv_queue_submit(struct anv_queue *queue,
       goto error;
 
    if (fence && fence->permanent.type == ANV_FENCE_TYPE_BO) {
+      assert(!device->has_thread_submit);
       /* If we have permanent BO fence, the only type of temporary possible
        * would be BO_WSI (because BO fences are not shareable). The Vulkan spec
        * also requires that the fence passed to vkQueueSubmit() be :
@@ -1291,16 +1504,34 @@ VkResult anv_GetFenceStatus(
       }
 
    case ANV_FENCE_TYPE_SYNCOBJ: {
-      int ret = anv_gem_syncobj_wait(device, &impl->syncobj, 1, 0, true);
-      if (ret == -1) {
-         if (errno == ETIME) {
-            return VK_NOT_READY;
+      if (device->has_thread_submit) {
+         uint64_t binary_value = 0;
+         int ret = anv_gem_syncobj_timeline_wait(device, &impl->syncobj,
+                                             &binary_value, 1, 0,
+                                             true /* wait_all */,
+                                             false /* wait_materialize */);
+         if (ret == -1) {
+            if (errno == ETIME) {
+               return VK_NOT_READY;
+            } else {
+               /* We don't know the real error. */
+               return anv_device_set_lost(device, "drm_syncobj_wait failed: %m");
+            }
          } else {
-            /* We don't know the real error. */
-            return anv_device_set_lost(device, "drm_syncobj_wait failed: %m");
+            return VK_SUCCESS;
          }
       } else {
-         return VK_SUCCESS;
+         int ret = anv_gem_syncobj_wait(device, &impl->syncobj, 1, 0, false);
+         if (ret == -1) {
+            if (errno == ETIME) {
+               return VK_NOT_READY;
+            } else {
+               /* We don't know the real error. */
+               return anv_device_set_lost(device, "drm_syncobj_wait failed: %m");
+            }
+         } else {
+            return VK_SUCCESS;
+         }
       }
    }
 
@@ -1334,11 +1565,11 @@ anv_wait_for_syncobj_fences(struct anv_device *device,
       syncobjs[i] = impl->syncobj;
    }
 
+   int ret = 0;
    /* The gem_syncobj_wait ioctl may return early due to an inherent
-    * limitation in the way it computes timeouts.  Loop until we've actually
+    * limitation in the way it computes timeouts. Loop until we've actually
     * passed the timeout.
     */
-   int ret;
    do {
       ret = anv_gem_syncobj_wait(device, syncobjs, fenceCount,
                                  abs_timeout_ns, waitAll);
@@ -1496,6 +1727,8 @@ anv_wait_for_fences(struct anv_device *device,
 
          switch (impl->type) {
          case ANV_FENCE_TYPE_BO:
+            assert(!device->physical->has_syncobj_wait);
+            /* fall-through */
          case ANV_FENCE_TYPE_WSI_BO:
             result = anv_wait_for_bo_fences(device, 1, &pFences[i],
                                             true, abs_timeout);
@@ -1695,6 +1928,31 @@ VkResult anv_ImportFenceFdKHR(
    return VK_SUCCESS;
 }
 
+/* The sideband payload of the DRM syncobj was incremented when the
+ * application called vkQueueSubmit(). Here we wait for a fence with the same
+ * value to materialize so that we can exporting (typically as a SyncFD).
+ */
+static VkResult
+wait_syncobj_materialize(struct anv_device *device,
+                         uint32_t syncobj,
+                         int *fd)
+{
+   if (!device->has_thread_submit)
+      return VK_SUCCESS;
+
+   uint64_t binary_value = 0;
+   /* We might need to wait until the fence materializes before we can
+    * export to a sync FD when we use a thread for submission.
+    */
+   if (anv_gem_syncobj_timeline_wait(device, &syncobj, &binary_value, 1,
+                                     anv_get_absolute_timeout(5ull * NSEC_PER_SEC),
+                                     true /* wait_all */,
+                                     true /* wait_materialize */))
+      return anv_device_set_lost(device, "anv_gem_syncobj_timeline_wait failed: %m");
+
+   return VK_SUCCESS;
+}
+
 VkResult anv_GetFenceFdKHR(
     VkDevice                                    _device,
     const VkFenceGetFdInfoKHR*                  pGetFdInfo,
@@ -1721,6 +1979,10 @@ VkResult anv_GetFenceFdKHR(
    }
 
    case VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT: {
+      VkResult result = wait_syncobj_materialize(device, impl->syncobj, pFd);
+      if (result != VK_SUCCESS)
+         return result;
+
       int fd = anv_gem_syncobj_export_sync_file(device, impl->syncobj);
       if (fd < 0)
          return vk_error(VK_ERROR_TOO_MANY_OBJECTS);
@@ -1794,8 +2056,24 @@ timeline_semaphore_create(struct anv_device *device,
                           struct anv_semaphore_impl *impl,
                           uint64_t initial_value)
 {
-   impl->type = ANV_SEMAPHORE_TYPE_TIMELINE;
-   anv_timeline_init(device, &impl->timeline, initial_value);
+   if (device->has_thread_submit) {
+      impl->type = ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ_TIMELINE;
+      impl->syncobj = anv_gem_syncobj_create(device, 0);
+      if (!impl->syncobj)
+         return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+      if (initial_value) {
+         if (anv_gem_syncobj_timeline_signal(device,
+                                             &impl->syncobj,
+                                             &initial_value, 1)) {
+            anv_gem_syncobj_destroy(device, impl->syncobj);
+            return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+         }
+      }
+   } else {
+      impl->type = ANV_SEMAPHORE_TYPE_TIMELINE;
+      anv_timeline_init(device, &impl->timeline, initial_value);
+   }
+
    return VK_SUCCESS;
 }
 
@@ -1824,7 +2102,7 @@ VkResult anv_CreateSemaphore(
 
    const VkExportSemaphoreCreateInfo *export =
       vk_find_struct_const(pCreateInfo->pNext, EXPORT_SEMAPHORE_CREATE_INFO);
-    VkExternalSemaphoreHandleTypeFlags handleTypes =
+   VkExternalSemaphoreHandleTypeFlags handleTypes =
       export ? export->handleTypes : 0;
    VkResult result;
 
@@ -1839,8 +2117,10 @@ VkResult anv_CreateSemaphore(
       }
    } else if (handleTypes & VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT) {
       assert(handleTypes == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT);
-      assert(sem_type == VK_SEMAPHORE_TYPE_BINARY_KHR);
-      result = binary_semaphore_create(device, &semaphore->permanent, true);
+      if (sem_type == VK_SEMAPHORE_TYPE_BINARY_KHR)
+         result = binary_semaphore_create(device, &semaphore->permanent, true);
+      else
+         result = timeline_semaphore_create(device, &semaphore->permanent, timeline_value);
       if (result != VK_SUCCESS) {
          vk_free2(&device->vk.alloc, pAllocator, semaphore);
          return result;
@@ -1897,6 +2177,7 @@ anv_semaphore_impl_cleanup(struct anv_device *device,
       break;
 
    case ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ:
+   case ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ_TIMELINE:
       anv_gem_syncobj_destroy(device, impl->syncobj);
       break;
 
@@ -1964,8 +2245,10 @@ void anv_GetPhysicalDeviceExternalSemaphoreProperties(
 
    switch (pExternalSemaphoreInfo->handleType) {
    case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT:
-      /* Timeline semaphores are not exportable. */
-      if (sem_type == VK_SEMAPHORE_TYPE_TIMELINE_KHR)
+      /* Timeline semaphores are not exportable, unless we have threaded
+       * submission.
+       */
+      if (sem_type == VK_SEMAPHORE_TYPE_TIMELINE_KHR && !device->has_thread_submit)
          break;
       pExternalSemaphoreProperties->exportFromImportedHandleTypes =
          VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT;
@@ -2014,7 +2297,15 @@ VkResult anv_ImportSemaphoreFdKHR(
    switch (pImportSemaphoreFdInfo->handleType) {
    case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT:
       if (device->physical->has_syncobj) {
-         new_impl.type = ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ;
+         /* When importing non temporarily, reuse the semaphore's existing
+          * type. The Linux/DRM implementation allows to interchangeably use
+          * binary & timeline semaphores and we have no way to differenciate
+          * them.
+          */
+         if (pImportSemaphoreFdInfo->flags & VK_SEMAPHORE_IMPORT_TEMPORARY_BIT)
+            new_impl.type = ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ;
+         else
+            new_impl.type = semaphore->permanent.type;
 
          new_impl.syncobj = anv_gem_syncobj_fd_to_handle(device, fd);
          if (!new_impl.syncobj)
@@ -2168,9 +2459,13 @@ VkResult anv_GetSemaphoreFdKHR(
    }
 
    case ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ:
-      if (pGetFdInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT)
+      if (pGetFdInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT) {
+         VkResult result = wait_syncobj_materialize(device, impl->syncobj, pFd);
+         if (result != VK_SUCCESS)
+            return result;
+
          fd = anv_gem_syncobj_export_sync_file(device, impl->syncobj);
-      else {
+      else {
          assert(pGetFdInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT);
          fd = anv_gem_syncobj_handle_to_fd(device, impl->syncobj);
       }
@@ -2179,6 +2474,14 @@ VkResult anv_GetSemaphoreFdKHR(
       *pFd = fd;
       break;
 
+   case ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ_TIMELINE:
+      assert(pGetFdInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT);
+      fd = anv_gem_syncobj_handle_to_fd(device, impl->syncobj);
+      if (fd < 0)
+         return vk_error(VK_ERROR_TOO_MANY_OBJECTS);
+      *pFd = fd;
+      break;
+
    default:
       return vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE);
    }
@@ -2217,6 +2520,15 @@ VkResult anv_GetSemaphoreCounterValue(
       return VK_SUCCESS;
    }
 
+   case ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ_TIMELINE: {
+      int ret = anv_gem_syncobj_timeline_query(device, &impl->syncobj, pValue, 1);
+
+      if (ret != 0)
+         return anv_device_set_lost(device, "unable to query timeline syncobj");
+
+      return VK_SUCCESS;
+   }
+
    default:
       unreachable("Invalid semaphore type");
    }
@@ -2236,8 +2548,8 @@ anv_timeline_wait_locked(struct anv_device *device,
          .tv_nsec = abs_timeout_ns % NSEC_PER_SEC,
       };
 
-      int ret = pthread_cond_timedwait(&device->queue_submit,
-                                       &device->mutex, &abstime);
+      UNUSED int ret = pthread_cond_timedwait(&device->queue_submit,
+                                              &device->mutex, &abstime);
       assert(ret != EINVAL);
       if (anv_gettime_ns() >= abs_timeout_ns &&
           timeline->highest_pending < serial)
@@ -2336,24 +2648,22 @@ VkResult anv_WaitSemaphores(
     uint64_t                                    timeout)
 {
    ANV_FROM_HANDLE(anv_device, device, _device);
+   uint32_t *handles;
+   struct anv_timeline **timelines;
+   uint64_t *values;
 
-   if (device->no_hw)
-      return VK_SUCCESS;
+   ANV_MULTIALLOC(ma);
 
-   struct anv_timeline **timelines =
-      vk_alloc(&device->vk.alloc,
-               pWaitInfo->semaphoreCount * sizeof(*timelines),
-               8, VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
-   if (!timelines)
-      return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+   anv_multialloc_add(&ma, &values, pWaitInfo->semaphoreCount);
+   if (device->has_thread_submit) {
+      anv_multialloc_add(&ma, &handles, pWaitInfo->semaphoreCount);
+   } else {
+      anv_multialloc_add(&ma, &timelines, pWaitInfo->semaphoreCount);
+   }
 
-   uint64_t *values = vk_alloc(&device->vk.alloc,
-                               pWaitInfo->semaphoreCount * sizeof(*values),
-                               8, VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
-   if (!values) {
-      vk_free(&device->vk.alloc, timelines);
+   if (!anv_multialloc_alloc(&ma, &device->vk.alloc,
+                             VK_SYSTEM_ALLOCATION_SCOPE_COMMAND))
       return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
-   }
 
    uint32_t handle_count = 0;
    for (uint32_t i = 0; i < pWaitInfo->semaphoreCount; i++) {
@@ -2362,24 +2672,40 @@ VkResult anv_WaitSemaphores(
          semaphore->temporary.type != ANV_SEMAPHORE_TYPE_NONE ?
          &semaphore->temporary : &semaphore->permanent;
 
-      assert(impl->type == ANV_SEMAPHORE_TYPE_TIMELINE);
-
       if (pWaitInfo->pValues[i] == 0)
          continue;
 
-      timelines[handle_count] = &impl->timeline;
+      if (device->has_thread_submit) {
+         assert(impl->type == ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ_TIMELINE);
+         handles[handle_count] = impl->syncobj;
+      } else {
+         assert(impl->type == ANV_SEMAPHORE_TYPE_TIMELINE);
+         timelines[handle_count] = &impl->timeline;
+      }
       values[handle_count] = pWaitInfo->pValues[i];
       handle_count++;
    }
 
    VkResult result = VK_SUCCESS;
    if (handle_count > 0) {
-      result = anv_timelines_wait(device, timelines, values, handle_count,
-                                  !(pWaitInfo->flags & VK_SEMAPHORE_WAIT_ANY_BIT_KHR),
-                                  anv_get_absolute_timeout(timeout));
+      if (device->has_thread_submit) {
+         int ret =
+            anv_gem_syncobj_timeline_wait(device,
+                                          handles, values, handle_count,
+                                          anv_get_absolute_timeout(timeout),
+                                          !(pWaitInfo->flags & VK_SEMAPHORE_WAIT_ANY_BIT_KHR),
+                                          false);
+         if (ret != 0)
+            result = errno == ETIME ? VK_TIMEOUT :
+               anv_device_set_lost(device, "unable to wait on timeline syncobj");
+      } else {
+         result =
+            anv_timelines_wait(device, timelines, values, handle_count,
+                               !(pWaitInfo->flags & VK_SEMAPHORE_WAIT_ANY_BIT_KHR),
+                               anv_get_absolute_timeout(timeout));
+      }
    }
 
-   vk_free(&device->vk.alloc, timelines);
    vk_free(&device->vk.alloc, values);
 
    return result;
@@ -2414,6 +2740,20 @@ VkResult anv_SignalSemaphore(
       return result;
    }
 
+   case ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ_TIMELINE: {
+      /* Timeline semaphores are created with a value of 0, so signaling on 0
+       * is a waste of time.
+       */
+      if (pSignalInfo->value == 0)
+         return VK_SUCCESS;
+
+      int ret = anv_gem_syncobj_timeline_signal(device, &impl->syncobj,
+                                                &pSignalInfo->value, 1);
+
+      return ret == 0 ? VK_SUCCESS :
+         anv_device_set_lost(device, "unable to signal timeline syncobj");
+   }
+
    default:
       unreachable("Invalid semaphore type");
    }
index 75bf4feadd3be30682b426290620f066e3806f59..cbe5bb0291484fa3b6db003d971b98dcebdf188f 100644 (file)
@@ -299,10 +299,62 @@ VkResult anv_QueuePresentKHR(
       }
    }
 
-   return wsi_common_queue_present(&queue->device->physical->wsi_device,
-                                   anv_device_to_handle(queue->device),
-                                   _queue, 0,
-                                   pPresentInfo);
+   if (device->has_thread_submit &&
+       pPresentInfo->waitSemaphoreCount > 0) {
+      /* Make sure all of the dependency semaphores have materialized when
+       * using a threaded submission.
+       */
+      uint32_t *syncobjs = vk_alloc(&device->vk.alloc,
+                                    sizeof(*syncobjs) * pPresentInfo->waitSemaphoreCount, 8,
+                                    VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
+
+      if (!syncobjs)
+         return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+
+      uint32_t wait_count = 0;
+      for (uint32_t i = 0; i < pPresentInfo->waitSemaphoreCount; i++) {
+         ANV_FROM_HANDLE(anv_semaphore, semaphore, pPresentInfo->pWaitSemaphores[i]);
+         struct anv_semaphore_impl *impl =
+            semaphore->temporary.type != ANV_SEMAPHORE_TYPE_NONE ?
+            &semaphore->temporary : &semaphore->permanent;
+
+         if (impl->type == ANV_SEMAPHORE_TYPE_DUMMY)
+            continue;
+         assert(impl->type == ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ);
+         syncobjs[wait_count++] = impl->syncobj;
+      }
+
+      int ret = 0;
+      if (wait_count > 0) {
+         ret =
+            anv_gem_syncobj_wait(device, syncobjs, wait_count,
+                                 anv_get_absolute_timeout(INT64_MAX),
+                                 true /* wait_all */);
+      }
+
+      vk_free(&device->vk.alloc, syncobjs);
+
+      if (ret)
+         return vk_error(VK_ERROR_DEVICE_LOST);
+   }
+
+   VkResult result = wsi_common_queue_present(&device->physical->wsi_device,
+                                              anv_device_to_handle(queue->device),
+                                              _queue, 0,
+                                              pPresentInfo);
+
+   for (uint32_t i = 0; i < pPresentInfo->waitSemaphoreCount; i++) {
+      ANV_FROM_HANDLE(anv_semaphore, semaphore, pPresentInfo->pWaitSemaphores[i]);
+      /* From the Vulkan 1.0.53 spec:
+       *
+       *    "If the import is temporary, the implementation must restore the
+       *    semaphore to its prior permanent state after submitting the next
+       *    semaphore wait operation."
+       */
+      anv_semaphore_reset_temporary(queue->device, semaphore);
+   }
+
+   return result;
 }
 
 VkResult anv_GetDeviceGroupPresentCapabilitiesKHR(