intel/fs: Fix MOV_INDIRECT and BROADCAST of Q types on Gen11+

[mesa.git] / src / intel / vulkan / anv_queue.c
diff --git a/src/intel/vulkan/anv_queue.c b/src/intel/vulkan/anv_queue.c

index 959b96aaaf2aeea1929915ccb16a386b0ae8e8e5..fdf10f2c01266dc6013ccb90f2ec1994a34fb31a 100644 (file)
--- a/src/intel/vulkan/anv_queue.c
+++ b/src/intel/vulkan/anv_queue.c
@@ -25,9 +25,12 @@
   * This file implements VkQueue, VkFence, and VkSemaphore
   */
  
+#include <errno.h>
  #include <fcntl.h>
  #include <unistd.h>
  
+#include "util/os_file.h"
+
  #include "anv_private.h"
  #include "vk_util.h"
  
@@ -92,23 +95,203 @@ anv_queue_submit_free(struct anv_device *device,
     for (uint32_t i = 0; i < submit->sync_fd_semaphore_count; i++)
        anv_semaphore_unref(device, submit->sync_fd_semaphores[i]);
     /* Execbuf does not consume the in_fence.  It's our job to close it. */
-   if (submit->in_fence != -1)
+   if (submit->in_fence != -1) {
+      assert(!device->has_thread_submit);
        close(submit->in_fence);
-   if (submit->out_fence != -1)
+   }
+   if (submit->out_fence != -1) {
+      assert(!device->has_thread_submit);
        close(submit->out_fence);
+   }
     vk_free(alloc, submit->fences);
+   vk_free(alloc, submit->fence_values);
     vk_free(alloc, submit->temporary_semaphores);
+   vk_free(alloc, submit->wait_timelines);
+   vk_free(alloc, submit->wait_timeline_values);
+   vk_free(alloc, submit->signal_timelines);
+   vk_free(alloc, submit->signal_timeline_values);
     vk_free(alloc, submit->fence_bos);
     vk_free(alloc, submit);
  }
  
+static bool
+anv_queue_submit_ready_locked(struct anv_queue_submit *submit)
+{
+   for (uint32_t i = 0; i < submit->wait_timeline_count; i++) {
+      if (submit->wait_timeline_values[i] > submit->wait_timelines[i]->highest_pending)
+         return false;
+   }
+
+   return true;
+}
+
  static VkResult
-_anv_queue_submit(struct anv_queue *queue, struct anv_queue_submit **_submit)
+anv_timeline_init(struct anv_device *device,
+                  struct anv_timeline *timeline,
+                  uint64_t initial_value)
  {
-   struct anv_queue_submit *submit = *_submit;
-   VkResult result = anv_queue_execbuf(queue, submit);
+   timeline->highest_past =
+      timeline->highest_pending = initial_value;
+   list_inithead(&timeline->points);
+   list_inithead(&timeline->free_points);
+
+   return VK_SUCCESS;
+}
+
+static void
+anv_timeline_finish(struct anv_device *device,
+                    struct anv_timeline *timeline)
+{
+   list_for_each_entry_safe(struct anv_timeline_point, point,
+                            &timeline->free_points, link) {
+      list_del(&point->link);
+      anv_device_release_bo(device, point->bo);
+      vk_free(&device->vk.alloc, point);
+   }
+   list_for_each_entry_safe(struct anv_timeline_point, point,
+                            &timeline->points, link) {
+      list_del(&point->link);
+      anv_device_release_bo(device, point->bo);
+      vk_free(&device->vk.alloc, point);
+   }
+}
+
+static VkResult
+anv_timeline_add_point_locked(struct anv_device *device,
+                              struct anv_timeline *timeline,
+                              uint64_t value,
+                              struct anv_timeline_point **point)
+{
+   VkResult result = VK_SUCCESS;
+
+   if (list_is_empty(&timeline->free_points)) {
+      *point =
+         vk_zalloc(&device->vk.alloc, sizeof(**point),
+                   8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
+      if (!(*point))
+         result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+      if (result == VK_SUCCESS) {
+         result = anv_device_alloc_bo(device, 4096,
+                                      ANV_BO_ALLOC_EXTERNAL |
+                                      ANV_BO_ALLOC_IMPLICIT_SYNC,
+                                      0 /* explicit_address */,
+                                      &(*point)->bo);
+         if (result != VK_SUCCESS)
+            vk_free(&device->vk.alloc, *point);
+      }
+   } else {
+      *point = list_first_entry(&timeline->free_points,
+                                struct anv_timeline_point, link);
+      list_del(&(*point)->link);
+   }
  
     if (result == VK_SUCCESS) {
+      (*point)->serial = value;
+      list_addtail(&(*point)->link, &timeline->points);
+   }
+
+   return result;
+}
+
+static VkResult
+anv_timeline_gc_locked(struct anv_device *device,
+                       struct anv_timeline *timeline)
+{
+   list_for_each_entry_safe(struct anv_timeline_point, point,
+                            &timeline->points, link) {
+      /* timeline->higest_pending is only incremented once submission has
+       * happened. If this point has a greater serial, it means the point
+       * hasn't been submitted yet.
+       */
+      if (point->serial > timeline->highest_pending)
+         return VK_SUCCESS;
+
+      /* If someone is waiting on this time point, consider it busy and don't
+       * try to recycle it. There's a slim possibility that it's no longer
+       * busy by the time we look at it but we would be recycling it out from
+       * under a waiter and that can lead to weird races.
+       *
+       * We walk the list in-order so if this time point is still busy so is
+       * every following time point
+       */
+      assert(point->waiting >= 0);
+      if (point->waiting)
+         return VK_SUCCESS;
+
+      /* Garbage collect any signaled point. */
+      VkResult result = anv_device_bo_busy(device, point->bo);
+      if (result == VK_NOT_READY) {
+         /* We walk the list in-order so if this time point is still busy so
+          * is every following time point
+          */
+         return VK_SUCCESS;
+      } else if (result != VK_SUCCESS) {
+         return result;
+      }
+
+      assert(timeline->highest_past < point->serial);
+      timeline->highest_past = point->serial;
+
+      list_del(&point->link);
+      list_add(&point->link, &timeline->free_points);
+   }
+
+   return VK_SUCCESS;
+}
+
+static VkResult anv_queue_submit_add_fence_bo(struct anv_queue_submit *submit,
+                                              struct anv_bo *bo,
+                                              bool signal);
+
+static VkResult
+anv_queue_submit_timeline_locked(struct anv_queue *queue,
+                                 struct anv_queue_submit *submit)
+{
+   VkResult result;
+
+   for (uint32_t i = 0; i < submit->wait_timeline_count; i++) {
+      struct anv_timeline *timeline = submit->wait_timelines[i];
+      uint64_t wait_value = submit->wait_timeline_values[i];
+
+      if (timeline->highest_past >= wait_value)
+         continue;
+
+      list_for_each_entry(struct anv_timeline_point, point, &timeline->points, link) {
+         if (point->serial < wait_value)
+            continue;
+         result = anv_queue_submit_add_fence_bo(submit, point->bo, false);
+         if (result != VK_SUCCESS)
+            return result;
+         break;
+      }
+   }
+   for (uint32_t i = 0; i < submit->signal_timeline_count; i++) {
+      struct anv_timeline *timeline = submit->signal_timelines[i];
+      uint64_t signal_value = submit->signal_timeline_values[i];
+      struct anv_timeline_point *point;
+
+      result = anv_timeline_add_point_locked(queue->device, timeline,
+                                             signal_value, &point);
+      if (result != VK_SUCCESS)
+         return result;
+
+      result = anv_queue_submit_add_fence_bo(submit, point->bo, true);
+      if (result != VK_SUCCESS)
+         return result;
+   }
+
+   result = anv_queue_execbuf_locked(queue, submit);
+
+   if (result == VK_SUCCESS) {
+      /* Update the pending values in the timeline objects. */
+      for (uint32_t i = 0; i < submit->signal_timeline_count; i++) {
+         struct anv_timeline *timeline = submit->signal_timelines[i];
+         uint64_t signal_value = submit->signal_timeline_values[i];
+
+         assert(signal_value > timeline->highest_pending);
+         timeline->highest_pending = signal_value;
+      }
+
        /* Update signaled semaphores backed by syncfd. */
        for (uint32_t i = 0; i < submit->sync_fd_semaphore_count; i++) {
           struct anv_semaphore *semaphore = submit->sync_fd_semaphores[i];
@@ -119,26 +302,247 @@ _anv_queue_submit(struct anv_queue *queue, struct anv_queue_submit **_submit)
           struct anv_semaphore_impl *impl = &semaphore->permanent;
  
           assert(impl->type == ANV_SEMAPHORE_TYPE_SYNC_FILE);
-         impl->fd = dup(submit->out_fence);
+         impl->fd = os_dupfd_cloexec(submit->out_fence);
+      }
+   } else {
+      /* Unblock any waiter by signaling the points, the application will get
+       * a device lost error code.
+       */
+      for (uint32_t i = 0; i < submit->signal_timeline_count; i++) {
+         struct anv_timeline *timeline = submit->signal_timelines[i];
+         uint64_t signal_value = submit->signal_timeline_values[i];
+
+         assert(signal_value > timeline->highest_pending);
+         timeline->highest_past = timeline->highest_pending = signal_value;
        }
     }
  
     return result;
  }
  
+static VkResult
+anv_queue_submit_deferred_locked(struct anv_queue *queue, uint32_t *advance)
+{
+   VkResult result = VK_SUCCESS;
+
+   /* Go through all the queued submissions and submit then until we find one
+    * that's waiting on a point that hasn't materialized yet.
+    */
+   list_for_each_entry_safe(struct anv_queue_submit, submit,
+                            &queue->queued_submits, link) {
+      if (!anv_queue_submit_ready_locked(submit))
+         break;
+
+      (*advance)++;
+      list_del(&submit->link);
+
+      result = anv_queue_submit_timeline_locked(queue, submit);
+
+      anv_queue_submit_free(queue->device, submit);
+
+      if (result != VK_SUCCESS)
+         break;
+   }
+
+   return result;
+}
+
+static VkResult
+anv_device_submit_deferred_locked(struct anv_device *device)
+{
+   uint32_t advance = 0;
+   return anv_queue_submit_deferred_locked(&device->queue, &advance);
+}
+
+static void
+anv_queue_submit_signal_fences(struct anv_device *device,
+                               struct anv_queue_submit *submit)
+{
+   for (uint32_t i = 0; i < submit->fence_count; i++) {
+      if (submit->fences[i].flags & I915_EXEC_FENCE_SIGNAL) {
+         anv_gem_syncobj_timeline_signal(device, &submit->fences[i].handle,
+                                         &submit->fence_values[i], 1);
+      }
+   }
+}
+
+static void *
+anv_queue_task(void *_queue)
+{
+   struct anv_queue *queue = _queue;
+
+   pthread_mutex_lock(&queue->mutex);
+
+   while (!queue->quit) {
+      while (!list_is_empty(&queue->queued_submits)) {
+         struct anv_queue_submit *submit =
+            list_first_entry(&queue->queued_submits, struct anv_queue_submit, link);
+         list_del(&submit->link);
+
+         pthread_mutex_unlock(&queue->mutex);
+
+         VkResult result = VK_ERROR_DEVICE_LOST;
+
+         /* Wait for timeline points to materialize before submitting. We need
+          * to do this because we're using threads to do the submit to i915.
+          * We could end up in a situation where the application submits to 2
+          * queues with the first submit creating the dma-fence for the
+          * second. But because the scheduling of the submission threads might
+          * wakeup the second queue thread first, this would make that execbuf
+          * fail because the dma-fence it depends on hasn't materialized yet.
+          */
+         if (!queue->lost && submit->wait_timeline_count > 0) {
+            int ret = queue->device->no_hw ? 0 :
+               anv_gem_syncobj_timeline_wait(
+                  queue->device, submit->wait_timeline_syncobjs,
+                  submit->wait_timeline_values, submit->wait_timeline_count,
+                  anv_get_absolute_timeout(UINT64_MAX) /* wait forever */,
+                  true /* wait for all */, true /* wait for materialize */);
+            if (ret) {
+               result = anv_queue_set_lost(queue, "timeline timeout: %s",
+                                           strerror(errno));
+            }
+         }
+
+         /* Now submit */
+         if (!queue->lost) {
+            pthread_mutex_lock(&queue->device->mutex);
+            result = anv_queue_execbuf_locked(queue, submit);
+            pthread_mutex_unlock(&queue->device->mutex);
+         }
+
+         for (uint32_t i = 0; i < submit->sync_fd_semaphore_count; i++) {
+            struct anv_semaphore *semaphore = submit->sync_fd_semaphores[i];
+            /* Out fences can't have temporary state because that would imply
+             * that we imported a sync file and are trying to signal it.
+             */
+            assert(semaphore->temporary.type == ANV_SEMAPHORE_TYPE_NONE);
+            struct anv_semaphore_impl *impl = &semaphore->permanent;
+
+            assert(impl->type == ANV_SEMAPHORE_TYPE_SYNC_FILE);
+            impl->fd = dup(submit->out_fence);
+         }
+
+         if (result != VK_SUCCESS) {
+            /* vkQueueSubmit or some other entry point will report the
+             * DEVICE_LOST error at some point, but until we have emptied our
+             * list of execbufs we need to wake up all potential the waiters
+             * until one of them spots the error.
+             */
+            anv_queue_submit_signal_fences(queue->device, submit);
+         }
+
+         anv_queue_submit_free(queue->device, submit);
+
+         pthread_mutex_lock(&queue->mutex);
+      }
+
+      if (!queue->quit)
+         pthread_cond_wait(&queue->cond, &queue->mutex);
+   }
+
+   pthread_mutex_unlock(&queue->mutex);
+
+   return NULL;
+}
+
+static VkResult
+_anv_queue_submit(struct anv_queue *queue, struct anv_queue_submit **_submit,
+                  bool flush_queue)
+{
+   struct anv_queue_submit *submit = *_submit;
+
+   /* Wait before signal behavior means we might keep alive the
+    * anv_queue_submit object a bit longer, so transfer the ownership to the
+    * anv_queue.
+    */
+   *_submit = NULL;
+   if (queue->device->has_thread_submit) {
+      pthread_mutex_lock(&queue->mutex);
+      pthread_cond_broadcast(&queue->cond);
+      list_addtail(&submit->link, &queue->queued_submits);
+      pthread_mutex_unlock(&queue->mutex);
+      return VK_SUCCESS;
+   } else {
+      pthread_mutex_lock(&queue->device->mutex);
+      list_addtail(&submit->link, &queue->queued_submits);
+      VkResult result = anv_device_submit_deferred_locked(queue->device);
+      if (flush_queue) {
+         while (result == VK_SUCCESS && !list_is_empty(&queue->queued_submits)) {
+            int ret = pthread_cond_wait(&queue->device->queue_submit,
+                                        &queue->device->mutex);
+            if (ret != 0) {
+               result = anv_device_set_lost(queue->device, "wait timeout");
+               break;
+            }
+
+            result = anv_device_submit_deferred_locked(queue->device);
+         }
+      }
+      pthread_mutex_unlock(&queue->device->mutex);
+      return result;
+   }
+}
+
  VkResult
  anv_queue_init(struct anv_device *device, struct anv_queue *queue)
  {
-   queue->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
+   VkResult result;
+
     queue->device = device;
     queue->flags = 0;
+   queue->lost = false;
+   queue->quit = false;
+
+   list_inithead(&queue->queued_submits);
+
+   /* We only need those additional thread/mutex when using a thread for
+    * submission.
+    */
+   if (device->has_thread_submit) {
+      if (pthread_mutex_init(&queue->mutex, NULL) != 0)
+         return vk_error(VK_ERROR_INITIALIZATION_FAILED);
+
+      if (pthread_cond_init(&queue->cond, NULL) != 0) {
+         result = vk_error(VK_ERROR_INITIALIZATION_FAILED);
+         goto fail_mutex;
+      }
+      if (pthread_create(&queue->thread, NULL, anv_queue_task, queue)) {
+         result = vk_error(VK_ERROR_INITIALIZATION_FAILED);
+         goto fail_cond;
+      }
+   }
+
+   vk_object_base_init(&device->vk, &queue->base, VK_OBJECT_TYPE_QUEUE);
  
     return VK_SUCCESS;
+
+ fail_cond:
+   pthread_cond_destroy(&queue->cond);
+ fail_mutex:
+   pthread_mutex_destroy(&queue->mutex);
+
+   return result;
  }
  
  void
  anv_queue_finish(struct anv_queue *queue)
  {
+   vk_object_base_finish(&queue->base);
+
+   if (!queue->device->has_thread_submit)
+      return;
+
+   pthread_mutex_lock(&queue->mutex);
+   pthread_cond_broadcast(&queue->cond);
+   queue->quit = true;
+   pthread_mutex_unlock(&queue->mutex);
+
+   void *ret;
+   pthread_join(queue->thread, &ret);
+
+   pthread_cond_destroy(&queue->cond);
+   pthread_mutex_destroy(&queue->mutex);
  }
  
  static VkResult
@@ -148,14 +552,14 @@ anv_queue_submit_add_fence_bo(struct anv_queue_submit *submit,
  {
     if (submit->fence_bo_count >= submit->fence_bo_array_length) {
        uint32_t new_len = MAX2(submit->fence_bo_array_length * 2, 64);
-
-      submit->fence_bos =
+      uintptr_t *new_fence_bos =
           vk_realloc(submit->alloc,
                      submit->fence_bos, new_len * sizeof(*submit->fence_bos),
                      8, submit->alloc_scope);
-      if (submit->fence_bos == NULL)
+      if (new_fence_bos == NULL)
           return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
  
+      submit->fence_bos = new_fence_bos;
        submit->fence_bo_array_length = new_len;
     }
  
@@ -170,27 +574,70 @@ anv_queue_submit_add_fence_bo(struct anv_queue_submit *submit,
  static VkResult
  anv_queue_submit_add_syncobj(struct anv_queue_submit* submit,
                               struct anv_device *device,
-                             uint32_t handle, uint32_t flags)
+                             uint32_t handle, uint32_t flags,
+                             uint64_t value)
  {
     assert(flags != 0);
  
+   if (device->has_thread_submit && (flags & I915_EXEC_FENCE_WAIT)) {
+      if (submit->wait_timeline_count >= submit->wait_timeline_array_length) {
+         uint32_t new_len = MAX2(submit->wait_timeline_array_length * 2, 64);
+
+         uint32_t *new_wait_timeline_syncobjs =
+            vk_realloc(submit->alloc,
+                       submit->wait_timeline_syncobjs,
+                       new_len * sizeof(*submit->wait_timeline_syncobjs),
+                       8, submit->alloc_scope);
+         if (new_wait_timeline_syncobjs == NULL)
+            return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+
+         submit->wait_timeline_syncobjs = new_wait_timeline_syncobjs;
+
+         uint64_t *new_wait_timeline_values =
+            vk_realloc(submit->alloc,
+                       submit->wait_timeline_values, new_len * sizeof(*submit->wait_timeline_values),
+                       8, submit->alloc_scope);
+         if (new_wait_timeline_values == NULL)
+            return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+
+         submit->wait_timeline_values = new_wait_timeline_values;
+         submit->wait_timeline_array_length = new_len;
+      }
+
+      submit->wait_timeline_syncobjs[submit->wait_timeline_count] = handle;
+      submit->wait_timeline_values[submit->wait_timeline_count] = value;
+
+      submit->wait_timeline_count++;
+   }
+
     if (submit->fence_count >= submit->fence_array_length) {
        uint32_t new_len = MAX2(submit->fence_array_length * 2, 64);
-
-      submit->fences =
+      struct drm_i915_gem_exec_fence *new_fences =
           vk_realloc(submit->alloc,
                      submit->fences, new_len * sizeof(*submit->fences),
                      8, submit->alloc_scope);
-      if (submit->fences == NULL)
+      if (new_fences == NULL)
           return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
  
+      submit->fences = new_fences;
+
+      uint64_t *new_fence_values =
+         vk_realloc(submit->alloc,
+                    submit->fence_values, new_len * sizeof(*submit->fence_values),
+                    8, submit->alloc_scope);
+      if (new_fence_values == NULL)
+         return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+
+      submit->fence_values = new_fence_values;
        submit->fence_array_length = new_len;
     }
  
-   submit->fences[submit->fence_count++] = (struct drm_i915_gem_exec_fence) {
+   submit->fences[submit->fence_count] = (struct drm_i915_gem_exec_fence) {
        .handle = handle,
        .flags = flags,
     };
+   submit->fence_values[submit->fence_count] = value;
+   submit->fence_count++;
  
     return VK_SUCCESS;
  }
@@ -218,11 +665,87 @@ anv_queue_submit_add_sync_fd_fence(struct anv_queue_submit *submit,
     return VK_SUCCESS;
  }
  
+static VkResult
+anv_queue_submit_add_timeline_wait(struct anv_queue_submit* submit,
+                                   struct anv_device *device,
+                                   struct anv_timeline *timeline,
+                                   uint64_t value)
+{
+   if (submit->wait_timeline_count >= submit->wait_timeline_array_length) {
+      uint32_t new_len = MAX2(submit->wait_timeline_array_length * 2, 64);
+      struct anv_timeline **new_wait_timelines =
+         vk_realloc(submit->alloc,
+                    submit->wait_timelines, new_len * sizeof(*submit->wait_timelines),
+                    8, submit->alloc_scope);
+      if (new_wait_timelines == NULL)
+         return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+
+      submit->wait_timelines = new_wait_timelines;
+
+      uint64_t *new_wait_timeline_values =
+         vk_realloc(submit->alloc,
+                    submit->wait_timeline_values, new_len * sizeof(*submit->wait_timeline_values),
+                    8, submit->alloc_scope);
+      if (new_wait_timeline_values == NULL)
+         return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+
+      submit->wait_timeline_values = new_wait_timeline_values;
+
+      submit->wait_timeline_array_length = new_len;
+   }
+
+   submit->wait_timelines[submit->wait_timeline_count] = timeline;
+   submit->wait_timeline_values[submit->wait_timeline_count] = value;
+
+   submit->wait_timeline_count++;
+
+   return VK_SUCCESS;
+}
+
+static VkResult
+anv_queue_submit_add_timeline_signal(struct anv_queue_submit* submit,
+                                     struct anv_device *device,
+                                     struct anv_timeline *timeline,
+                                     uint64_t value)
+{
+   assert(timeline->highest_pending < value);
+
+   if (submit->signal_timeline_count >= submit->signal_timeline_array_length) {
+      uint32_t new_len = MAX2(submit->signal_timeline_array_length * 2, 64);
+      struct anv_timeline **new_signal_timelines =
+         vk_realloc(submit->alloc,
+                    submit->signal_timelines, new_len * sizeof(*submit->signal_timelines),
+                    8, submit->alloc_scope);
+      if (new_signal_timelines == NULL)
+            return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+
+      submit->signal_timelines = new_signal_timelines;
+
+      uint64_t *new_signal_timeline_values =
+         vk_realloc(submit->alloc,
+                    submit->signal_timeline_values, new_len * sizeof(*submit->signal_timeline_values),
+                    8, submit->alloc_scope);
+      if (new_signal_timeline_values == NULL)
+         return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+
+      submit->signal_timeline_values = new_signal_timeline_values;
+
+      submit->signal_timeline_array_length = new_len;
+   }
+
+   submit->signal_timelines[submit->signal_timeline_count] = timeline;
+   submit->signal_timeline_values[submit->signal_timeline_count] = value;
+
+   submit->signal_timeline_count++;
+
+   return VK_SUCCESS;
+}
+
  static struct anv_queue_submit *
-anv_queue_submit_alloc(struct anv_device *device)
+anv_queue_submit_alloc(struct anv_device *device, int perf_query_pass)
  {
-   const VkAllocationCallbacks *alloc = &device->alloc;
-   VkSystemAllocationScope alloc_scope = VK_SYSTEM_ALLOCATION_SCOPE_COMMAND;
+   const VkAllocationCallbacks *alloc = &device->vk.alloc;
+   VkSystemAllocationScope alloc_scope = VK_SYSTEM_ALLOCATION_SCOPE_DEVICE;
  
     struct anv_queue_submit *submit = vk_zalloc(alloc, sizeof(*submit), 8, alloc_scope);
     if (!submit)
@@ -232,6 +755,7 @@ anv_queue_submit_alloc(struct anv_device *device)
     submit->alloc_scope = alloc_scope;
     submit->in_fence = -1;
     submit->out_fence = -1;
+   submit->perf_query_pass = perf_query_pass;
  
     return submit;
  }
@@ -240,12 +764,15 @@ VkResult
  anv_queue_submit_simple_batch(struct anv_queue *queue,
                                struct anv_batch *batch)
  {
+   if (queue->device->no_hw)
+      return VK_SUCCESS;
+
     struct anv_device *device = queue->device;
-   struct anv_queue_submit *submit = anv_queue_submit_alloc(device);
+   struct anv_queue_submit *submit = anv_queue_submit_alloc(device, -1);
     if (!submit)
        return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
  
-   bool has_syncobj_wait = device->instance->physicalDevice.has_syncobj_wait;
+   bool has_syncobj_wait = device->physical->has_syncobj_wait;
     VkResult result;
     uint32_t syncobj;
     struct anv_bo *batch_bo, *sync_bo;
@@ -258,11 +785,12 @@ anv_queue_submit_simple_batch(struct anv_queue *queue,
        }
  
        result = anv_queue_submit_add_syncobj(submit, device, syncobj,
-                                            I915_EXEC_FENCE_SIGNAL);
+                                            I915_EXEC_FENCE_SIGNAL, 0);
     } else {
        result = anv_device_alloc_bo(device, 4096,
                                     ANV_BO_ALLOC_EXTERNAL |
                                     ANV_BO_ALLOC_IMPLICIT_SYNC,
+                                   0 /* explicit_address */,
                                     &sync_bo);
        if (result != VK_SUCCESS)
           goto err_free_submit;
@@ -287,7 +815,7 @@ anv_queue_submit_simple_batch(struct anv_queue *queue,
        submit->simple_bo_size = size;
     }
  
-   result = _anv_queue_submit(queue, &submit);
+   result = _anv_queue_submit(queue, &submit, true);
  
     if (result == VK_SUCCESS) {
        if (has_syncobj_wait) {
@@ -338,6 +866,9 @@ maybe_transfer_temporary_semaphore(struct anv_queue_submit *submit,
        return VK_SUCCESS;
     }
  
+   /* BO backed timeline semaphores cannot be temporary. */
+   assert(impl->type != ANV_SEMAPHORE_TYPE_TIMELINE);
+
     /*
      * There is a requirement to reset semaphore to their permanent state after
      * submission. From the Vulkan 1.0.53 spec:
@@ -382,22 +913,25 @@ static VkResult
  anv_queue_submit(struct anv_queue *queue,
                   struct anv_cmd_buffer *cmd_buffer,
                   const VkSemaphore *in_semaphores,
+                 const uint64_t *in_values,
                   uint32_t num_in_semaphores,
                   const VkSemaphore *out_semaphores,
+                 const uint64_t *out_values,
                   uint32_t num_out_semaphores,
-                 VkFence _fence)
+                 struct anv_bo *wsi_signal_bo,
+                 VkFence _fence,
+                 int perf_query_pass)
  {
     ANV_FROM_HANDLE(anv_fence, fence, _fence);
     struct anv_device *device = queue->device;
-   UNUSED struct anv_physical_device *pdevice = &device->instance->physicalDevice;
-   struct anv_queue_submit *submit = anv_queue_submit_alloc(device);
+   UNUSED struct anv_physical_device *pdevice = device->physical;
+   struct anv_queue_submit *submit = anv_queue_submit_alloc(device, perf_query_pass);
     if (!submit)
        return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
  
     submit->cmd_buffer = cmd_buffer;
  
     VkResult result = VK_SUCCESS;
-
     for (uint32_t i = 0; i < num_in_semaphores; i++) {
        ANV_FROM_HANDLE(anv_semaphore, semaphore, in_semaphores[i]);
        struct anv_semaphore_impl *impl;
@@ -414,6 +948,18 @@ anv_queue_submit(struct anv_queue *queue,
              goto error;
           break;
  
+      case ANV_SEMAPHORE_TYPE_WSI_BO:
+         /* When using a window-system buffer as a semaphore, always enable
+          * EXEC_OBJECT_WRITE.  This gives us a WaR hazard with the display or
+          * compositor's read of the buffer and enforces that we don't start
+          * rendering until they are finished.  This is exactly the
+          * synchronization we want with vkAcquireNextImage.
+          */
+         result = anv_queue_submit_add_fence_bo(submit, impl->bo, true /* signal */);
+         if (result != VK_SUCCESS)
+            goto error;
+         break;
+
        case ANV_SEMAPHORE_TYPE_SYNC_FILE:
           assert(!pdevice->has_syncobj);
           if (submit->in_fence == -1) {
@@ -439,12 +985,30 @@ anv_queue_submit(struct anv_queue *queue,
        case ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ: {
           result = anv_queue_submit_add_syncobj(submit, device,
                                                 impl->syncobj,
-                                               I915_EXEC_FENCE_WAIT);
+                                               I915_EXEC_FENCE_WAIT,
+                                               0);
           if (result != VK_SUCCESS)
              goto error;
           break;
        }
  
+      case ANV_SEMAPHORE_TYPE_TIMELINE:
+         result = anv_queue_submit_add_timeline_wait(submit, device,
+                                                     &impl->timeline,
+                                                     in_values ? in_values[i] : 0);
+         if (result != VK_SUCCESS)
+            goto error;
+         break;
+
+      case ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ_TIMELINE:
+         result = anv_queue_submit_add_syncobj(submit, device,
+                                               impl->syncobj,
+                                               I915_EXEC_FENCE_WAIT,
+                                               in_values ? in_values[i] : 0);
+         if (result != VK_SUCCESS)
+            goto error;
+         break;
+
        default:
           break;
        }
@@ -485,17 +1049,40 @@ anv_queue_submit(struct anv_queue *queue,
  
        case ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ: {
           result = anv_queue_submit_add_syncobj(submit, device, impl->syncobj,
-                                               I915_EXEC_FENCE_SIGNAL);
+                                               I915_EXEC_FENCE_SIGNAL,
+                                               0);
           if (result != VK_SUCCESS)
              goto error;
           break;
        }
  
+      case ANV_SEMAPHORE_TYPE_TIMELINE:
+         result = anv_queue_submit_add_timeline_signal(submit, device,
+                                                       &impl->timeline,
+                                                       out_values ? out_values[i] : 0);
+         if (result != VK_SUCCESS)
+            goto error;
+         break;
+
+      case ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ_TIMELINE:
+         result = anv_queue_submit_add_syncobj(submit, device, impl->syncobj,
+                                               I915_EXEC_FENCE_SIGNAL,
+                                               out_values ? out_values[i] : 0);
+         if (result != VK_SUCCESS)
+            goto error;
+         break;
+
        default:
           break;
        }
     }
  
+   if (wsi_signal_bo) {
+      result = anv_queue_submit_add_fence_bo(submit, wsi_signal_bo, true /* signal */);
+      if (result != VK_SUCCESS)
+         goto error;
+   }
+
     if (fence) {
        /* Under most circumstances, out fences won't be temporary.  However,
         * the spec does allow it for opaque_fd.  From the Vulkan 1.0.53 spec:
@@ -514,6 +1101,7 @@ anv_queue_submit(struct anv_queue *queue,
  
        switch (impl->type) {
        case ANV_FENCE_TYPE_BO:
+         assert(!device->has_thread_submit);
           result = anv_queue_submit_add_fence_bo(submit, impl->bo.bo, true /* signal */);
           if (result != VK_SUCCESS)
              goto error;
@@ -525,8 +1113,11 @@ anv_queue_submit(struct anv_queue *queue,
            * also reset the fence's syncobj so that they don't contain a
            * signaled dma-fence.
            */
+         anv_gem_syncobj_reset(device, impl->syncobj);
+
           result = anv_queue_submit_add_syncobj(submit, device, impl->syncobj,
-                                               I915_EXEC_FENCE_SIGNAL);
+                                               I915_EXEC_FENCE_SIGNAL,
+                                               0);
           if (result != VK_SUCCESS)
              goto error;
           break;
@@ -537,12 +1128,22 @@ anv_queue_submit(struct anv_queue *queue,
        }
     }
  
-   result = _anv_queue_submit(queue, &submit);
+   result = _anv_queue_submit(queue, &submit, false);
     if (result != VK_SUCCESS)
        goto error;
  
     if (fence && fence->permanent.type == ANV_FENCE_TYPE_BO) {
-      /* BO fences can't be shared, so they can't be temporary. */
+      assert(!device->has_thread_submit);
+      /* If we have permanent BO fence, the only type of temporary possible
+       * would be BO_WSI (because BO fences are not shareable). The Vulkan spec
+       * also requires that the fence passed to vkQueueSubmit() be :
+       *
+       *    * unsignaled
+       *    * not be associated with any other queue command that has not yet
+       *      completed execution on that queue
+       *
+       * So the only acceptable type for the temporary is NONE.
+       */
        assert(fence->temporary.type == ANV_FENCE_TYPE_NONE);
  
        /* Once the execbuf has returned, we need to set the fence state to
@@ -573,6 +1174,9 @@ VkResult anv_QueueSubmit(
  {
     ANV_FROM_HANDLE(anv_queue, queue, _queue);
  
+   if (queue->device->no_hw)
+      return VK_SUCCESS;
+
     /* Query for device status prior to submitting.  Technically, we don't need
      * to do this.  However, if we have a client that's submitting piles of
      * garbage, we would rather break as early as possible to keep the GPU
@@ -590,7 +1194,8 @@ VkResult anv_QueueSubmit(
         * come up with something more efficient but this shouldn't be a
         * common case.
         */
-      result = anv_queue_submit(queue, NULL, NULL, 0, NULL, 0, fence);
+      result = anv_queue_submit(queue, NULL, NULL, NULL, 0, NULL, NULL, 0,
+                                NULL, fence, -1);
        goto out;
     }
  
@@ -598,6 +1203,26 @@ VkResult anv_QueueSubmit(
        /* Fence for this submit.  NULL for all but the last one */
        VkFence submit_fence = (i == submitCount - 1) ? fence : VK_NULL_HANDLE;
  
+      const struct wsi_memory_signal_submit_info *mem_signal_info =
+         vk_find_struct_const(pSubmits[i].pNext,
+                              WSI_MEMORY_SIGNAL_SUBMIT_INFO_MESA);
+      struct anv_bo *wsi_signal_bo =
+         mem_signal_info && mem_signal_info->memory != VK_NULL_HANDLE ?
+         anv_device_memory_from_handle(mem_signal_info->memory)->bo : NULL;
+
+      const VkTimelineSemaphoreSubmitInfoKHR *timeline_info =
+         vk_find_struct_const(pSubmits[i].pNext,
+                              TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR);
+      const VkPerformanceQuerySubmitInfoKHR *perf_info =
+         vk_find_struct_const(pSubmits[i].pNext,
+                              PERFORMANCE_QUERY_SUBMIT_INFO_KHR);
+      const uint64_t *wait_values =
+         timeline_info && timeline_info->waitSemaphoreValueCount ?
+         timeline_info->pWaitSemaphoreValues : NULL;
+      const uint64_t *signal_values =
+         timeline_info && timeline_info->signalSemaphoreValueCount ?
+         timeline_info->pSignalSemaphoreValues : NULL;
+
        if (pSubmits[i].commandBufferCount == 0) {
           /* If we don't have any command buffers, we need to submit a dummy
            * batch to give GEM something to wait on.  We could, potentially,
@@ -606,10 +1231,14 @@ VkResult anv_QueueSubmit(
            */
           result = anv_queue_submit(queue, NULL,
                                     pSubmits[i].pWaitSemaphores,
+                                   wait_values,
                                     pSubmits[i].waitSemaphoreCount,
                                     pSubmits[i].pSignalSemaphores,
+                                   signal_values,
                                     pSubmits[i].signalSemaphoreCount,
-                                   submit_fence);
+                                   wsi_signal_bo,
+                                   submit_fence,
+                                   -1);
           if (result != VK_SUCCESS)
              goto out;
  
@@ -628,23 +1257,27 @@ VkResult anv_QueueSubmit(
              submit_fence : VK_NULL_HANDLE;
  
           const VkSemaphore *in_semaphores = NULL, *out_semaphores = NULL;
+         const uint64_t *in_values = NULL, *out_values = NULL;
           uint32_t num_in_semaphores = 0, num_out_semaphores = 0;
           if (j == 0) {
              /* Only the first batch gets the in semaphores */
              in_semaphores = pSubmits[i].pWaitSemaphores;
+            in_values = wait_values;
              num_in_semaphores = pSubmits[i].waitSemaphoreCount;
           }
  
           if (j == pSubmits[i].commandBufferCount - 1) {
              /* Only the last batch gets the out semaphores */
              out_semaphores = pSubmits[i].pSignalSemaphores;
+            out_values = signal_values;
              num_out_semaphores = pSubmits[i].signalSemaphoreCount;
           }
  
           result = anv_queue_submit(queue, cmd_buffer,
-                                   in_semaphores, num_in_semaphores,
-                                   out_semaphores, num_out_semaphores,
-                                   execbuf_fence);
+                                   in_semaphores, in_values, num_in_semaphores,
+                                   out_semaphores, out_values, num_out_semaphores,
+                                   wsi_signal_bo, execbuf_fence,
+                                   perf_info ? perf_info->counterPassIndex : 0);
           if (result != VK_SUCCESS)
              goto out;
        }
@@ -696,12 +1329,14 @@ VkResult anv_CreateFence(
  
     assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FENCE_CREATE_INFO);
  
-   fence = vk_zalloc2(&device->alloc, pAllocator, sizeof(*fence), 8,
+   fence = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*fence), 8,
                        VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
     if (fence == NULL)
        return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
  
-   if (device->instance->physicalDevice.has_syncobj_wait) {
+   vk_object_base_init(&device->vk, &fence->base, VK_OBJECT_TYPE_FENCE);
+
+   if (device->physical->has_syncobj_wait) {
        fence->permanent.type = ANV_FENCE_TYPE_SYNCOBJ;
  
        uint32_t create_flags = 0;
@@ -744,6 +1379,10 @@ anv_fence_impl_cleanup(struct anv_device *device,
        anv_bo_pool_free(&device->batch_bo_pool, impl->bo.bo);
        break;
  
+   case ANV_FENCE_TYPE_WSI_BO:
+      anv_device_release_bo(device, impl->bo.bo);
+      break;
+
     case ANV_FENCE_TYPE_SYNCOBJ:
        anv_gem_syncobj_destroy(device, impl->syncobj);
        break;
@@ -759,6 +1398,16 @@ anv_fence_impl_cleanup(struct anv_device *device,
     impl->type = ANV_FENCE_TYPE_NONE;
  }
  
+void
+anv_fence_reset_temporary(struct anv_device *device,
+                          struct anv_fence *fence)
+{
+   if (fence->temporary.type == ANV_FENCE_TYPE_NONE)
+      return;
+
+   anv_fence_impl_cleanup(device, &fence->temporary);
+}
+
  void anv_DestroyFence(
      VkDevice                                    _device,
      VkFence                                     _fence,
@@ -773,7 +1422,8 @@ void anv_DestroyFence(
     anv_fence_impl_cleanup(device, &fence->temporary);
     anv_fence_impl_cleanup(device, &fence->permanent);
  
-   vk_free2(&device->alloc, pAllocator, fence);
+   vk_object_base_finish(&fence->base);
+   vk_free2(&device->vk.alloc, pAllocator, fence);
  }
  
  VkResult anv_ResetFences(
@@ -793,8 +1443,7 @@ VkResult anv_ResetFences(
         *    first restored. The remaining operations described therefore
         *    operate on the restored payload.
         */
-      if (fence->temporary.type != ANV_FENCE_TYPE_NONE)
-         anv_fence_impl_cleanup(device, &fence->temporary);
+      anv_fence_reset_temporary(device, fence);
  
        struct anv_fence_impl *impl = &fence->permanent;
  
@@ -831,8 +1480,7 @@ VkResult anv_GetFenceStatus(
  
     switch (impl->type) {
     case ANV_FENCE_TYPE_BO:
-      /* BO fences don't support import/export */
-      assert(fence->temporary.type == ANV_FENCE_TYPE_NONE);
+   case ANV_FENCE_TYPE_WSI_BO:
        switch (impl->bo.state) {
        case ANV_BO_FENCE_STATE_RESET:
           /* If it hasn't even been sent off to the GPU yet, it's not ready */
@@ -856,16 +1504,34 @@ VkResult anv_GetFenceStatus(
        }
  
     case ANV_FENCE_TYPE_SYNCOBJ: {
-      int ret = anv_gem_syncobj_wait(device, &impl->syncobj, 1, 0, true);
-      if (ret == -1) {
-         if (errno == ETIME) {
-            return VK_NOT_READY;
+      if (device->has_thread_submit) {
+         uint64_t binary_value = 0;
+         int ret = anv_gem_syncobj_timeline_wait(device, &impl->syncobj,
+                                             &binary_value, 1, 0,
+                                             true /* wait_all */,
+                                             false /* wait_materialize */);
+         if (ret == -1) {
+            if (errno == ETIME) {
+               return VK_NOT_READY;
+            } else {
+               /* We don't know the real error. */
+               return anv_device_set_lost(device, "drm_syncobj_wait failed: %m");
+            }
           } else {
-            /* We don't know the real error. */
-            return anv_device_set_lost(device, "drm_syncobj_wait failed: %m");
+            return VK_SUCCESS;
           }
        } else {
-         return VK_SUCCESS;
+         int ret = anv_gem_syncobj_wait(device, &impl->syncobj, 1, 0, false);
+         if (ret == -1) {
+            if (errno == ETIME) {
+               return VK_NOT_READY;
+            } else {
+               /* We don't know the real error. */
+               return anv_device_set_lost(device, "drm_syncobj_wait failed: %m");
+            }
+         } else {
+            return VK_SUCCESS;
+         }
        }
     }
  
@@ -881,7 +1547,7 @@ anv_wait_for_syncobj_fences(struct anv_device *device,
                              bool waitAll,
                              uint64_t abs_timeout_ns)
  {
-   uint32_t *syncobjs = vk_zalloc(&device->alloc,
+   uint32_t *syncobjs = vk_zalloc(&device->vk.alloc,
                                    sizeof(*syncobjs) * fenceCount, 8,
                                    VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
     if (!syncobjs)
@@ -899,17 +1565,17 @@ anv_wait_for_syncobj_fences(struct anv_device *device,
        syncobjs[i] = impl->syncobj;
     }
  
+   int ret = 0;
     /* The gem_syncobj_wait ioctl may return early due to an inherent
-    * limitation in the way it computes timeouts.  Loop until we've actually
+    * limitation in the way it computes timeouts. Loop until we've actually
      * passed the timeout.
      */
-   int ret;
     do {
        ret = anv_gem_syncobj_wait(device, syncobjs, fenceCount,
                                   abs_timeout_ns, waitAll);
     } while (ret == -1 && errno == ETIME && anv_gettime_ns() < abs_timeout_ns);
  
-   vk_free(&device->alloc, syncobjs);
+   vk_free(&device->vk.alloc, syncobjs);
  
     if (ret == -1) {
        if (errno == ETIME) {
@@ -938,13 +1604,11 @@ anv_wait_for_bo_fences(struct anv_device *device,
        for (uint32_t i = 0; i < fenceCount; i++) {
           ANV_FROM_HANDLE(anv_fence, fence, pFences[i]);
  
-         /* This function assumes that all fences are BO fences and that they
-          * have no temporary state.  Since BO fences will never be exported,
-          * this should be a safe assumption.
-          */
-         assert(fence->permanent.type == ANV_FENCE_TYPE_BO);
-         assert(fence->temporary.type == ANV_FENCE_TYPE_NONE);
-         struct anv_fence_impl *impl = &fence->permanent;
+         struct anv_fence_impl *impl =
+            fence->temporary.type != ANV_FENCE_TYPE_NONE ?
+            &fence->temporary : &fence->permanent;
+         assert(impl->type == ANV_FENCE_TYPE_BO ||
+                impl->type == ANV_FENCE_TYPE_WSI_BO);
  
           switch (impl->bo.state) {
           case ANV_BO_FENCE_STATE_RESET:
@@ -1039,12 +1703,9 @@ done:
  
  static VkResult
  anv_wait_for_wsi_fence(struct anv_device *device,
-                       const VkFence _fence,
+                       struct anv_fence_impl *impl,
                         uint64_t abs_timeout)
  {
-   ANV_FROM_HANDLE(anv_fence, fence, _fence);
-   struct anv_fence_impl *impl = &fence->permanent;
-
     return impl->fence_wsi->wait(impl->fence_wsi, abs_timeout);
  }
  
@@ -1060,8 +1721,15 @@ anv_wait_for_fences(struct anv_device *device,
     if (fenceCount <= 1 || waitAll) {
        for (uint32_t i = 0; i < fenceCount; i++) {
           ANV_FROM_HANDLE(anv_fence, fence, pFences[i]);
-         switch (fence->permanent.type) {
+         struct anv_fence_impl *impl =
+            fence->temporary.type != ANV_FENCE_TYPE_NONE ?
+            &fence->temporary : &fence->permanent;
+
+         switch (impl->type) {
           case ANV_FENCE_TYPE_BO:
+            assert(!device->physical->has_syncobj_wait);
+            /* fall-through */
+         case ANV_FENCE_TYPE_WSI_BO:
              result = anv_wait_for_bo_fences(device, 1, &pFences[i],
                                              true, abs_timeout);
              break;
@@ -1070,7 +1738,7 @@ anv_wait_for_fences(struct anv_device *device,
                                                   true, abs_timeout);
              break;
           case ANV_FENCE_TYPE_WSI:
-            result = anv_wait_for_wsi_fence(device, pFences[i], abs_timeout);
+            result = anv_wait_for_wsi_fence(device, impl, abs_timeout);
              break;
           case ANV_FENCE_TYPE_NONE:
              result = VK_SUCCESS;
@@ -1095,7 +1763,10 @@ static bool anv_all_fences_syncobj(uint32_t fenceCount, const VkFence *pFences)
  {
     for (uint32_t i = 0; i < fenceCount; ++i) {
        ANV_FROM_HANDLE(anv_fence, fence, pFences[i]);
-      if (fence->permanent.type != ANV_FENCE_TYPE_SYNCOBJ)
+      struct anv_fence_impl *impl =
+         fence->temporary.type != ANV_FENCE_TYPE_NONE ?
+         &fence->temporary : &fence->permanent;
+      if (impl->type != ANV_FENCE_TYPE_SYNCOBJ)
           return false;
     }
     return true;
@@ -1105,7 +1776,11 @@ static bool anv_all_fences_bo(uint32_t fenceCount, const VkFence *pFences)
  {
     for (uint32_t i = 0; i < fenceCount; ++i) {
        ANV_FROM_HANDLE(anv_fence, fence, pFences[i]);
-      if (fence->permanent.type != ANV_FENCE_TYPE_BO)
+      struct anv_fence_impl *impl =
+         fence->temporary.type != ANV_FENCE_TYPE_NONE ?
+         &fence->temporary : &fence->permanent;
+      if (impl->type != ANV_FENCE_TYPE_BO &&
+          impl->type != ANV_FENCE_TYPE_WSI_BO)
           return false;
     }
     return true;
@@ -1120,6 +1795,9 @@ VkResult anv_WaitForFences(
  {
     ANV_FROM_HANDLE(anv_device, device, _device);
  
+   if (device->no_hw)
+      return VK_SUCCESS;
+
     if (anv_device_is_lost(device))
        return VK_ERROR_DEVICE_LOST;
  
@@ -1194,24 +1872,35 @@ VkResult anv_ImportFenceFdKHR(
  
        break;
  
-   case VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT:
+   case VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT: {
        /* Sync files are a bit tricky.  Because we want to continue using the
         * syncobj implementation of WaitForFences, we don't use the sync file
         * directly but instead import it into a syncobj.
         */
        new_impl.type = ANV_FENCE_TYPE_SYNCOBJ;
  
-      new_impl.syncobj = anv_gem_syncobj_create(device, 0);
+      /* "If handleType is VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT, the
+       *  special value -1 for fd is treated like a valid sync file descriptor
+       *  referring to an object that has already signaled. The import
+       *  operation will succeed and the VkFence will have a temporarily
+       *  imported payload as if a valid file descriptor had been provided."
+       */
+      uint32_t create_flags = 0;
+      if (fd == -1)
+         create_flags |= DRM_SYNCOBJ_CREATE_SIGNALED;
+
+      new_impl.syncobj = anv_gem_syncobj_create(device, create_flags);
        if (!new_impl.syncobj)
           return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
  
-      if (anv_gem_syncobj_import_sync_file(device, new_impl.syncobj, fd)) {
+      if (fd != -1 &&
+          anv_gem_syncobj_import_sync_file(device, new_impl.syncobj, fd)) {
           anv_gem_syncobj_destroy(device, new_impl.syncobj);
-         return vk_errorf(device->instance, NULL,
-                          VK_ERROR_INVALID_EXTERNAL_HANDLE,
+         return vk_errorf(device, NULL, VK_ERROR_INVALID_EXTERNAL_HANDLE,
                            "syncobj sync file import failed: %m");
        }
        break;
+   }
  
     default:
        return vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE);
@@ -1239,6 +1928,31 @@ VkResult anv_ImportFenceFdKHR(
     return VK_SUCCESS;
  }
  
+/* The sideband payload of the DRM syncobj was incremented when the
+ * application called vkQueueSubmit(). Here we wait for a fence with the same
+ * value to materialize so that we can exporting (typically as a SyncFD).
+ */
+static VkResult
+wait_syncobj_materialize(struct anv_device *device,
+                         uint32_t syncobj,
+                         int *fd)
+{
+   if (!device->has_thread_submit)
+      return VK_SUCCESS;
+
+   uint64_t binary_value = 0;
+   /* We might need to wait until the fence materializes before we can
+    * export to a sync FD when we use a thread for submission.
+    */
+   if (anv_gem_syncobj_timeline_wait(device, &syncobj, &binary_value, 1,
+                                     anv_get_absolute_timeout(5ull * NSEC_PER_SEC),
+                                     true /* wait_all */,
+                                     true /* wait_materialize */))
+      return anv_device_set_lost(device, "anv_gem_syncobj_timeline_wait failed: %m");
+
+   return VK_SUCCESS;
+}
+
  VkResult anv_GetFenceFdKHR(
      VkDevice                                    _device,
      const VkFenceGetFdInfoKHR*                  pGetFdInfo,
@@ -1265,6 +1979,10 @@ VkResult anv_GetFenceFdKHR(
     }
  
     case VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT: {
+      VkResult result = wait_syncobj_materialize(device, impl->syncobj, pFd);
+      if (result != VK_SUCCESS)
+         return result;
+
        int fd = anv_gem_syncobj_export_sync_file(device, impl->syncobj);
        if (fd < 0)
           return vk_error(VK_ERROR_TOO_MANY_OBJECTS);
@@ -1292,6 +2010,73 @@ VkResult anv_GetFenceFdKHR(
  
  // Queue semaphore functions
  
+static VkSemaphoreTypeKHR
+get_semaphore_type(const void *pNext, uint64_t *initial_value)
+{
+   const VkSemaphoreTypeCreateInfoKHR *type_info =
+      vk_find_struct_const(pNext, SEMAPHORE_TYPE_CREATE_INFO_KHR);
+
+   if (!type_info)
+      return VK_SEMAPHORE_TYPE_BINARY_KHR;
+
+   if (initial_value)
+      *initial_value = type_info->initialValue;
+   return type_info->semaphoreType;
+}
+
+static VkResult
+binary_semaphore_create(struct anv_device *device,
+                        struct anv_semaphore_impl *impl,
+                        bool exportable)
+{
+   if (device->physical->has_syncobj) {
+      impl->type = ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ;
+      impl->syncobj = anv_gem_syncobj_create(device, 0);
+      if (!impl->syncobj)
+            return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+      return VK_SUCCESS;
+   } else {
+      impl->type = ANV_SEMAPHORE_TYPE_BO;
+      VkResult result =
+         anv_device_alloc_bo(device, 4096,
+                             ANV_BO_ALLOC_EXTERNAL |
+                             ANV_BO_ALLOC_IMPLICIT_SYNC,
+                             0 /* explicit_address */,
+                             &impl->bo);
+      /* If we're going to use this as a fence, we need to *not* have the
+       * EXEC_OBJECT_ASYNC bit set.
+       */
+      assert(!(impl->bo->flags & EXEC_OBJECT_ASYNC));
+      return result;
+   }
+}
+
+static VkResult
+timeline_semaphore_create(struct anv_device *device,
+                          struct anv_semaphore_impl *impl,
+                          uint64_t initial_value)
+{
+   if (device->has_thread_submit) {
+      impl->type = ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ_TIMELINE;
+      impl->syncobj = anv_gem_syncobj_create(device, 0);
+      if (!impl->syncobj)
+         return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+      if (initial_value) {
+         if (anv_gem_syncobj_timeline_signal(device,
+                                             &impl->syncobj,
+                                             &initial_value, 1)) {
+            anv_gem_syncobj_destroy(device, impl->syncobj);
+            return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+         }
+      }
+   } else {
+      impl->type = ANV_SEMAPHORE_TYPE_TIMELINE;
+      anv_timeline_init(device, &impl->timeline, initial_value);
+   }
+
+   return VK_SUCCESS;
+}
+
  VkResult anv_CreateSemaphore(
      VkDevice                                    _device,
      const VkSemaphoreCreateInfo*                pCreateInfo,
@@ -1303,61 +2088,60 @@ VkResult anv_CreateSemaphore(
  
     assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO);
  
-   semaphore = vk_alloc(&device->alloc, sizeof(*semaphore), 8,
+   uint64_t timeline_value = 0;
+   VkSemaphoreTypeKHR sem_type = get_semaphore_type(pCreateInfo->pNext, &timeline_value);
+
+   semaphore = vk_alloc(&device->vk.alloc, sizeof(*semaphore), 8,
                          VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
     if (semaphore == NULL)
        return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
  
+   vk_object_base_init(&device->vk, &semaphore->base, VK_OBJECT_TYPE_SEMAPHORE);
+
     p_atomic_set(&semaphore->refcount, 1);
  
     const VkExportSemaphoreCreateInfo *export =
        vk_find_struct_const(pCreateInfo->pNext, EXPORT_SEMAPHORE_CREATE_INFO);
-    VkExternalSemaphoreHandleTypeFlags handleTypes =
+   VkExternalSemaphoreHandleTypeFlags handleTypes =
        export ? export->handleTypes : 0;
+   VkResult result;
  
     if (handleTypes == 0) {
-      /* The DRM execbuffer ioctl always execute in-oder so long as you stay
-       * on the same ring.  Since we don't expose the blit engine as a DMA
-       * queue, a dummy no-op semaphore is a perfectly valid implementation.
-       */
-      semaphore->permanent.type = ANV_SEMAPHORE_TYPE_DUMMY;
+      if (sem_type == VK_SEMAPHORE_TYPE_BINARY_KHR)
+         result = binary_semaphore_create(device, &semaphore->permanent, false);
+      else
+         result = timeline_semaphore_create(device, &semaphore->permanent, timeline_value);
+      if (result != VK_SUCCESS) {
+         vk_free2(&device->vk.alloc, pAllocator, semaphore);
+         return result;
+      }
     } else if (handleTypes & VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT) {
        assert(handleTypes == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT);
-      if (device->instance->physicalDevice.has_syncobj) {
-         semaphore->permanent.type = ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ;
-         semaphore->permanent.syncobj = anv_gem_syncobj_create(device, 0);
-         if (!semaphore->permanent.syncobj) {
-            vk_free2(&device->alloc, pAllocator, semaphore);
-            return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
-         }
-      } else {
-         semaphore->permanent.type = ANV_SEMAPHORE_TYPE_BO;
-         VkResult result = anv_device_alloc_bo(device, 4096,
-                                               ANV_BO_ALLOC_EXTERNAL |
-                                               ANV_BO_ALLOC_IMPLICIT_SYNC,
-                                               &semaphore->permanent.bo);
-         if (result != VK_SUCCESS) {
-            vk_free2(&device->alloc, pAllocator, semaphore);
-            return result;
-         }
-
-         /* If we're going to use this as a fence, we need to *not* have the
-          * EXEC_OBJECT_ASYNC bit set.
-          */
-         assert(!(semaphore->permanent.bo->flags & EXEC_OBJECT_ASYNC));
+      if (sem_type == VK_SEMAPHORE_TYPE_BINARY_KHR)
+         result = binary_semaphore_create(device, &semaphore->permanent, true);
+      else
+         result = timeline_semaphore_create(device, &semaphore->permanent, timeline_value);
+      if (result != VK_SUCCESS) {
+         vk_free2(&device->vk.alloc, pAllocator, semaphore);
+         return result;
        }
     } else if (handleTypes & VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT) {
        assert(handleTypes == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT);
-      if (device->instance->physicalDevice.has_syncobj) {
+      assert(sem_type == VK_SEMAPHORE_TYPE_BINARY_KHR);
+      if (device->physical->has_syncobj) {
           semaphore->permanent.type = ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ;
           semaphore->permanent.syncobj = anv_gem_syncobj_create(device, 0);
+         if (!semaphore->permanent.syncobj) {
+            vk_free2(&device->vk.alloc, pAllocator, semaphore);
+            return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+         }
        } else {
           semaphore->permanent.type = ANV_SEMAPHORE_TYPE_SYNC_FILE;
           semaphore->permanent.fd = -1;
        }
     } else {
        assert(!"Unknown handle type");
-      vk_free2(&device->alloc, pAllocator, semaphore);
+      vk_free2(&device->vk.alloc, pAllocator, semaphore);
        return vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE);
     }
  
@@ -1379,14 +2163,21 @@ anv_semaphore_impl_cleanup(struct anv_device *device,
        break;
  
     case ANV_SEMAPHORE_TYPE_BO:
+   case ANV_SEMAPHORE_TYPE_WSI_BO:
        anv_device_release_bo(device, impl->bo);
        break;
  
     case ANV_SEMAPHORE_TYPE_SYNC_FILE:
-      close(impl->fd);
+      if (impl->fd >= 0)
+         close(impl->fd);
+      break;
+
+   case ANV_SEMAPHORE_TYPE_TIMELINE:
+      anv_timeline_finish(device, &impl->timeline);
        break;
  
     case ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ:
+   case ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ_TIMELINE:
        anv_gem_syncobj_destroy(device, impl->syncobj);
        break;
  
@@ -1423,7 +2214,9 @@ anv_semaphore_unref(struct anv_device *device, struct anv_semaphore *semaphore)
  
     anv_semaphore_impl_cleanup(device, &semaphore->temporary);
     anv_semaphore_impl_cleanup(device, &semaphore->permanent);
-   vk_free(&device->alloc, semaphore);
+
+   vk_object_base_finish(&semaphore->base);
+   vk_free(&device->vk.alloc, semaphore);
  }
  
  void anv_DestroySemaphore(
@@ -1447,8 +2240,16 @@ void anv_GetPhysicalDeviceExternalSemaphoreProperties(
  {
     ANV_FROM_HANDLE(anv_physical_device, device, physicalDevice);
  
+   VkSemaphoreTypeKHR sem_type =
+      get_semaphore_type(pExternalSemaphoreInfo->pNext, NULL);
+
     switch (pExternalSemaphoreInfo->handleType) {
     case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT:
+      /* Timeline semaphores are not exportable, unless we have threaded
+       * submission.
+       */
+      if (sem_type == VK_SEMAPHORE_TYPE_TIMELINE_KHR && !device->has_thread_submit)
+         break;
        pExternalSemaphoreProperties->exportFromImportedHandleTypes =
           VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT;
        pExternalSemaphoreProperties->compatibleHandleTypes =
@@ -1459,17 +2260,18 @@ void anv_GetPhysicalDeviceExternalSemaphoreProperties(
        return;
  
     case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT:
-      if (device->has_exec_fence) {
-         pExternalSemaphoreProperties->exportFromImportedHandleTypes =
-            VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT;
-         pExternalSemaphoreProperties->compatibleHandleTypes =
-            VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT;
-         pExternalSemaphoreProperties->externalSemaphoreFeatures =
-            VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT |
-            VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT;
-         return;
-      }
-      break;
+      if (sem_type == VK_SEMAPHORE_TYPE_TIMELINE_KHR)
+         break;
+      if (!device->has_exec_fence)
+         break;
+      pExternalSemaphoreProperties->exportFromImportedHandleTypes =
+         VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT;
+      pExternalSemaphoreProperties->compatibleHandleTypes =
+         VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT;
+      pExternalSemaphoreProperties->externalSemaphoreFeatures =
+         VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT |
+         VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT;
+      return;
  
     default:
        break;
@@ -1494,8 +2296,16 @@ VkResult anv_ImportSemaphoreFdKHR(
  
     switch (pImportSemaphoreFdInfo->handleType) {
     case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT:
-      if (device->instance->physicalDevice.has_syncobj) {
-         new_impl.type = ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ;
+      if (device->physical->has_syncobj) {
+         /* When importing non temporarily, reuse the semaphore's existing
+          * type. The Linux/DRM implementation allows to interchangeably use
+          * binary & timeline semaphores and we have no way to differenciate
+          * them.
+          */
+         if (pImportSemaphoreFdInfo->flags & VK_SEMAPHORE_IMPORT_TEMPORARY_BIT)
+            new_impl.type = ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ;
+         else
+            new_impl.type = semaphore->permanent.type;
  
           new_impl.syncobj = anv_gem_syncobj_fd_to_handle(device, fd);
           if (!new_impl.syncobj)
@@ -1506,6 +2316,7 @@ VkResult anv_ImportSemaphoreFdKHR(
           VkResult result = anv_device_import_bo(device, fd,
                                                  ANV_BO_ALLOC_EXTERNAL |
                                                  ANV_BO_ALLOC_IMPLICIT_SYNC,
+                                                0 /* client_address */,
                                                  &new_impl.bo);
           if (result != VK_SUCCESS)
              return result;
@@ -1534,24 +2345,32 @@ VkResult anv_ImportSemaphoreFdKHR(
        break;
  
     case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT:
-      if (device->instance->physicalDevice.has_syncobj) {
+      if (device->physical->has_syncobj) {
+         uint32_t create_flags = 0;
+
+         if (fd == -1)
+            create_flags |= DRM_SYNCOBJ_CREATE_SIGNALED;
+
           new_impl = (struct anv_semaphore_impl) {
              .type = ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ,
-            .syncobj = anv_gem_syncobj_create(device, 0),
+            .syncobj = anv_gem_syncobj_create(device, create_flags),
           };
+
           if (!new_impl.syncobj)
              return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
-         if (anv_gem_syncobj_import_sync_file(device, new_impl.syncobj, fd)) {
-            anv_gem_syncobj_destroy(device, new_impl.syncobj);
-            return vk_errorf(device->instance, NULL,
-                             VK_ERROR_INVALID_EXTERNAL_HANDLE,
-                             "syncobj sync file import failed: %m");
+
+         if (fd != -1) {
+            if (anv_gem_syncobj_import_sync_file(device, new_impl.syncobj, fd)) {
+               anv_gem_syncobj_destroy(device, new_impl.syncobj);
+               return vk_errorf(device, NULL, VK_ERROR_INVALID_EXTERNAL_HANDLE,
+                                "syncobj sync file import failed: %m");
+            }
+            /* Ownership of the FD is transfered to Anv. Since we don't need it
+             * anymore because the associated fence has been put into a syncobj,
+             * we must close the FD.
+             */
+            close(fd);
           }
-         /* Ownership of the FD is transfered to Anv. Since we don't need it
-          * anymore because the associated fence has been put into a syncobj,
-          * we must close the FD.
-          */
-         close(fd);
        } else {
           new_impl = (struct anv_semaphore_impl) {
              .type = ANV_SEMAPHORE_TYPE_SYNC_FILE,
@@ -1640,9 +2459,13 @@ VkResult anv_GetSemaphoreFdKHR(
     }
  
     case ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ:
-      if (pGetFdInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT)
+      if (pGetFdInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT) {
+         VkResult result = wait_syncobj_materialize(device, impl->syncobj, pFd);
+         if (result != VK_SUCCESS)
+            return result;
+
           fd = anv_gem_syncobj_export_sync_file(device, impl->syncobj);
-      else {
+      } else {
           assert(pGetFdInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT);
           fd = anv_gem_syncobj_handle_to_fd(device, impl->syncobj);
        }
@@ -1651,6 +2474,14 @@ VkResult anv_GetSemaphoreFdKHR(
        *pFd = fd;
        break;
  
+   case ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ_TIMELINE:
+      assert(pGetFdInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT);
+      fd = anv_gem_syncobj_handle_to_fd(device, impl->syncobj);
+      if (fd < 0)
+         return vk_error(VK_ERROR_TOO_MANY_OBJECTS);
+      *pFd = fd;
+      break;
+
     default:
        return vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE);
     }
@@ -1667,3 +2498,263 @@ VkResult anv_GetSemaphoreFdKHR(
  
     return VK_SUCCESS;
  }
+
+VkResult anv_GetSemaphoreCounterValue(
+    VkDevice                                    _device,
+    VkSemaphore                                 _semaphore,
+    uint64_t*                                   pValue)
+{
+   ANV_FROM_HANDLE(anv_device, device, _device);
+   ANV_FROM_HANDLE(anv_semaphore, semaphore, _semaphore);
+
+   struct anv_semaphore_impl *impl =
+      semaphore->temporary.type != ANV_SEMAPHORE_TYPE_NONE ?
+      &semaphore->temporary : &semaphore->permanent;
+
+   switch (impl->type) {
+   case ANV_SEMAPHORE_TYPE_TIMELINE: {
+      pthread_mutex_lock(&device->mutex);
+      anv_timeline_gc_locked(device, &impl->timeline);
+      *pValue = impl->timeline.highest_past;
+      pthread_mutex_unlock(&device->mutex);
+      return VK_SUCCESS;
+   }
+
+   case ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ_TIMELINE: {
+      int ret = anv_gem_syncobj_timeline_query(device, &impl->syncobj, pValue, 1);
+
+      if (ret != 0)
+         return anv_device_set_lost(device, "unable to query timeline syncobj");
+
+      return VK_SUCCESS;
+   }
+
+   default:
+      unreachable("Invalid semaphore type");
+   }
+}
+
+static VkResult
+anv_timeline_wait_locked(struct anv_device *device,
+                         struct anv_timeline *timeline,
+                         uint64_t serial, uint64_t abs_timeout_ns)
+{
+   /* Wait on the queue_submit condition variable until the timeline has a
+    * time point pending that's at least as high as serial.
+    */
+   while (timeline->highest_pending < serial) {
+      struct timespec abstime = {
+         .tv_sec = abs_timeout_ns / NSEC_PER_SEC,
+         .tv_nsec = abs_timeout_ns % NSEC_PER_SEC,
+      };
+
+      UNUSED int ret = pthread_cond_timedwait(&device->queue_submit,
+                                              &device->mutex, &abstime);
+      assert(ret != EINVAL);
+      if (anv_gettime_ns() >= abs_timeout_ns &&
+          timeline->highest_pending < serial)
+         return VK_TIMEOUT;
+   }
+
+   while (1) {
+      VkResult result = anv_timeline_gc_locked(device, timeline);
+      if (result != VK_SUCCESS)
+         return result;
+
+      if (timeline->highest_past >= serial)
+         return VK_SUCCESS;
+
+      /* If we got here, our earliest time point has a busy BO */
+      struct anv_timeline_point *point =
+         list_first_entry(&timeline->points,
+                          struct anv_timeline_point, link);
+
+      /* Drop the lock while we wait. */
+      point->waiting++;
+      pthread_mutex_unlock(&device->mutex);
+
+      result = anv_device_wait(device, point->bo,
+                               anv_get_relative_timeout(abs_timeout_ns));
+
+      /* Pick the mutex back up */
+      pthread_mutex_lock(&device->mutex);
+      point->waiting--;
+
+      /* This covers both VK_TIMEOUT and VK_ERROR_DEVICE_LOST */
+      if (result != VK_SUCCESS)
+         return result;
+   }
+}
+
+static VkResult
+anv_timelines_wait(struct anv_device *device,
+                   struct anv_timeline **timelines,
+                   const uint64_t *serials,
+                   uint32_t n_timelines,
+                   bool wait_all,
+                   uint64_t abs_timeout_ns)
+{
+   if (!wait_all && n_timelines > 1) {
+      pthread_mutex_lock(&device->mutex);
+
+      while (1) {
+         VkResult result;
+         for (uint32_t i = 0; i < n_timelines; i++) {
+            result =
+               anv_timeline_wait_locked(device, timelines[i], serials[i], 0);
+            if (result != VK_TIMEOUT)
+               break;
+         }
+
+         if (result != VK_TIMEOUT ||
+             anv_gettime_ns() >= abs_timeout_ns) {
+            pthread_mutex_unlock(&device->mutex);
+            return result;
+         }
+
+         /* If none of them are ready do a short wait so we don't completely
+          * spin while holding the lock. The 10us is completely arbitrary.
+          */
+         uint64_t abs_short_wait_ns =
+            anv_get_absolute_timeout(
+               MIN2((anv_gettime_ns() - abs_timeout_ns) / 10, 10 * 1000));
+         struct timespec abstime = {
+            .tv_sec = abs_short_wait_ns / NSEC_PER_SEC,
+            .tv_nsec = abs_short_wait_ns % NSEC_PER_SEC,
+         };
+         ASSERTED int ret;
+         ret = pthread_cond_timedwait(&device->queue_submit,
+                                      &device->mutex, &abstime);
+         assert(ret != EINVAL);
+      }
+   } else {
+      VkResult result = VK_SUCCESS;
+      pthread_mutex_lock(&device->mutex);
+      for (uint32_t i = 0; i < n_timelines; i++) {
+         result =
+            anv_timeline_wait_locked(device, timelines[i],
+                                     serials[i], abs_timeout_ns);
+         if (result != VK_SUCCESS)
+            break;
+      }
+      pthread_mutex_unlock(&device->mutex);
+      return result;
+   }
+}
+
+VkResult anv_WaitSemaphores(
+    VkDevice                                    _device,
+    const VkSemaphoreWaitInfoKHR*               pWaitInfo,
+    uint64_t                                    timeout)
+{
+   ANV_FROM_HANDLE(anv_device, device, _device);
+   uint32_t *handles;
+   struct anv_timeline **timelines;
+   uint64_t *values;
+
+   ANV_MULTIALLOC(ma);
+
+   anv_multialloc_add(&ma, &values, pWaitInfo->semaphoreCount);
+   if (device->has_thread_submit) {
+      anv_multialloc_add(&ma, &handles, pWaitInfo->semaphoreCount);
+   } else {
+      anv_multialloc_add(&ma, &timelines, pWaitInfo->semaphoreCount);
+   }
+
+   if (!anv_multialloc_alloc(&ma, &device->vk.alloc,
+                             VK_SYSTEM_ALLOCATION_SCOPE_COMMAND))
+      return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+
+   uint32_t handle_count = 0;
+   for (uint32_t i = 0; i < pWaitInfo->semaphoreCount; i++) {
+      ANV_FROM_HANDLE(anv_semaphore, semaphore, pWaitInfo->pSemaphores[i]);
+      struct anv_semaphore_impl *impl =
+         semaphore->temporary.type != ANV_SEMAPHORE_TYPE_NONE ?
+         &semaphore->temporary : &semaphore->permanent;
+
+      if (pWaitInfo->pValues[i] == 0)
+         continue;
+
+      if (device->has_thread_submit) {
+         assert(impl->type == ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ_TIMELINE);
+         handles[handle_count] = impl->syncobj;
+      } else {
+         assert(impl->type == ANV_SEMAPHORE_TYPE_TIMELINE);
+         timelines[handle_count] = &impl->timeline;
+      }
+      values[handle_count] = pWaitInfo->pValues[i];
+      handle_count++;
+   }
+
+   VkResult result = VK_SUCCESS;
+   if (handle_count > 0) {
+      if (device->has_thread_submit) {
+         int ret =
+            anv_gem_syncobj_timeline_wait(device,
+                                          handles, values, handle_count,
+                                          anv_get_absolute_timeout(timeout),
+                                          !(pWaitInfo->flags & VK_SEMAPHORE_WAIT_ANY_BIT_KHR),
+                                          false);
+         if (ret != 0)
+            result = errno == ETIME ? VK_TIMEOUT :
+               anv_device_set_lost(device, "unable to wait on timeline syncobj");
+      } else {
+         result =
+            anv_timelines_wait(device, timelines, values, handle_count,
+                               !(pWaitInfo->flags & VK_SEMAPHORE_WAIT_ANY_BIT_KHR),
+                               anv_get_absolute_timeout(timeout));
+      }
+   }
+
+   vk_free(&device->vk.alloc, values);
+
+   return result;
+}
+
+VkResult anv_SignalSemaphore(
+    VkDevice                                    _device,
+    const VkSemaphoreSignalInfoKHR*             pSignalInfo)
+{
+   ANV_FROM_HANDLE(anv_device, device, _device);
+   ANV_FROM_HANDLE(anv_semaphore, semaphore, pSignalInfo->semaphore);
+
+   struct anv_semaphore_impl *impl =
+      semaphore->temporary.type != ANV_SEMAPHORE_TYPE_NONE ?
+      &semaphore->temporary : &semaphore->permanent;
+
+   switch (impl->type) {
+   case ANV_SEMAPHORE_TYPE_TIMELINE: {
+      pthread_mutex_lock(&device->mutex);
+
+      VkResult result = anv_timeline_gc_locked(device, &impl->timeline);
+
+      assert(pSignalInfo->value > impl->timeline.highest_pending);
+
+      impl->timeline.highest_pending = impl->timeline.highest_past = pSignalInfo->value;
+
+      if (result == VK_SUCCESS)
+         result = anv_device_submit_deferred_locked(device);
+
+      pthread_cond_broadcast(&device->queue_submit);
+      pthread_mutex_unlock(&device->mutex);
+      return result;
+   }
+
+   case ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ_TIMELINE: {
+      /* Timeline semaphores are created with a value of 0, so signaling on 0
+       * is a waste of time.
+       */
+      if (pSignalInfo->value == 0)
+         return VK_SUCCESS;
+
+      int ret = anv_gem_syncobj_timeline_signal(device, &impl->syncobj,
+                                                &pSignalInfo->value, 1);
+
+      return ret == 0 ? VK_SUCCESS :
+         anv_device_set_lost(device, "unable to signal timeline syncobj");
+   }
+
+   default:
+      unreachable("Invalid semaphore type");
+   }
+}