anv: implement shareable timeline semaphores
[mesa.git] / src / intel / vulkan / anv_queue.c
index 7ba64af54c46d580c9fe8fd765f4852bcfaf84d4..fdf10f2c01266dc6013ccb90f2ec1994a34fb31a 100644 (file)
@@ -95,11 +95,16 @@ anv_queue_submit_free(struct anv_device *device,
    for (uint32_t i = 0; i < submit->sync_fd_semaphore_count; i++)
       anv_semaphore_unref(device, submit->sync_fd_semaphores[i]);
    /* Execbuf does not consume the in_fence.  It's our job to close it. */
-   if (submit->in_fence != -1)
+   if (submit->in_fence != -1) {
+      assert(!device->has_thread_submit);
       close(submit->in_fence);
-   if (submit->out_fence != -1)
+   }
+   if (submit->out_fence != -1) {
+      assert(!device->has_thread_submit);
       close(submit->out_fence);
+   }
    vk_free(alloc, submit->fences);
+   vk_free(alloc, submit->fence_values);
    vk_free(alloc, submit->temporary_semaphores);
    vk_free(alloc, submit->wait_timelines);
    vk_free(alloc, submit->wait_timeline_values);
@@ -349,6 +354,98 @@ anv_device_submit_deferred_locked(struct anv_device *device)
    return anv_queue_submit_deferred_locked(&device->queue, &advance);
 }
 
+static void
+anv_queue_submit_signal_fences(struct anv_device *device,
+                               struct anv_queue_submit *submit)
+{
+   for (uint32_t i = 0; i < submit->fence_count; i++) {
+      if (submit->fences[i].flags & I915_EXEC_FENCE_SIGNAL) {
+         anv_gem_syncobj_timeline_signal(device, &submit->fences[i].handle,
+                                         &submit->fence_values[i], 1);
+      }
+   }
+}
+
+static void *
+anv_queue_task(void *_queue)
+{
+   struct anv_queue *queue = _queue;
+
+   pthread_mutex_lock(&queue->mutex);
+
+   while (!queue->quit) {
+      while (!list_is_empty(&queue->queued_submits)) {
+         struct anv_queue_submit *submit =
+            list_first_entry(&queue->queued_submits, struct anv_queue_submit, link);
+         list_del(&submit->link);
+
+         pthread_mutex_unlock(&queue->mutex);
+
+         VkResult result = VK_ERROR_DEVICE_LOST;
+
+         /* Wait for timeline points to materialize before submitting. We need
+          * to do this because we're using threads to do the submit to i915.
+          * We could end up in a situation where the application submits to 2
+          * queues with the first submit creating the dma-fence for the
+          * second. But because the scheduling of the submission threads might
+          * wakeup the second queue thread first, this would make that execbuf
+          * fail because the dma-fence it depends on hasn't materialized yet.
+          */
+         if (!queue->lost && submit->wait_timeline_count > 0) {
+            int ret = queue->device->no_hw ? 0 :
+               anv_gem_syncobj_timeline_wait(
+                  queue->device, submit->wait_timeline_syncobjs,
+                  submit->wait_timeline_values, submit->wait_timeline_count,
+                  anv_get_absolute_timeout(UINT64_MAX) /* wait forever */,
+                  true /* wait for all */, true /* wait for materialize */);
+            if (ret) {
+               result = anv_queue_set_lost(queue, "timeline timeout: %s",
+                                           strerror(errno));
+            }
+         }
+
+         /* Now submit */
+         if (!queue->lost) {
+            pthread_mutex_lock(&queue->device->mutex);
+            result = anv_queue_execbuf_locked(queue, submit);
+            pthread_mutex_unlock(&queue->device->mutex);
+         }
+
+         for (uint32_t i = 0; i < submit->sync_fd_semaphore_count; i++) {
+            struct anv_semaphore *semaphore = submit->sync_fd_semaphores[i];
+            /* Out fences can't have temporary state because that would imply
+             * that we imported a sync file and are trying to signal it.
+             */
+            assert(semaphore->temporary.type == ANV_SEMAPHORE_TYPE_NONE);
+            struct anv_semaphore_impl *impl = &semaphore->permanent;
+
+            assert(impl->type == ANV_SEMAPHORE_TYPE_SYNC_FILE);
+            impl->fd = dup(submit->out_fence);
+         }
+
+         if (result != VK_SUCCESS) {
+            /* vkQueueSubmit or some other entry point will report the
+             * DEVICE_LOST error at some point, but until we have emptied our
+             * list of execbufs we need to wake up all potential the waiters
+             * until one of them spots the error.
+             */
+            anv_queue_submit_signal_fences(queue->device, submit);
+         }
+
+         anv_queue_submit_free(queue->device, submit);
+
+         pthread_mutex_lock(&queue->mutex);
+      }
+
+      if (!queue->quit)
+         pthread_cond_wait(&queue->cond, &queue->mutex);
+   }
+
+   pthread_mutex_unlock(&queue->mutex);
+
+   return NULL;
+}
+
 static VkResult
 _anv_queue_submit(struct anv_queue *queue, struct anv_queue_submit **_submit,
                   bool flush_queue)
@@ -360,42 +457,92 @@ _anv_queue_submit(struct anv_queue *queue, struct anv_queue_submit **_submit,
     * anv_queue.
     */
    *_submit = NULL;
+   if (queue->device->has_thread_submit) {
+      pthread_mutex_lock(&queue->mutex);
+      pthread_cond_broadcast(&queue->cond);
+      list_addtail(&submit->link, &queue->queued_submits);
+      pthread_mutex_unlock(&queue->mutex);
+      return VK_SUCCESS;
+   } else {
+      pthread_mutex_lock(&queue->device->mutex);
+      list_addtail(&submit->link, &queue->queued_submits);
+      VkResult result = anv_device_submit_deferred_locked(queue->device);
+      if (flush_queue) {
+         while (result == VK_SUCCESS && !list_is_empty(&queue->queued_submits)) {
+            int ret = pthread_cond_wait(&queue->device->queue_submit,
+                                        &queue->device->mutex);
+            if (ret != 0) {
+               result = anv_device_set_lost(queue->device, "wait timeout");
+               break;
+            }
 
-   pthread_mutex_lock(&queue->device->mutex);
-   list_addtail(&submit->link, &queue->queued_submits);
-   VkResult result = anv_device_submit_deferred_locked(queue->device);
-   if (flush_queue) {
-      while (result == VK_SUCCESS && !list_is_empty(&queue->queued_submits)) {
-         int ret = pthread_cond_wait(&queue->device->queue_submit,
-                                     &queue->device->mutex);
-         if (ret != 0) {
-            result = anv_device_set_lost(queue->device, "wait timeout");
-            break;
+            result = anv_device_submit_deferred_locked(queue->device);
          }
-
-         result = anv_device_submit_deferred_locked(queue->device);
       }
+      pthread_mutex_unlock(&queue->device->mutex);
+      return result;
    }
-   pthread_mutex_unlock(&queue->device->mutex);
-   return result;
 }
 
 VkResult
 anv_queue_init(struct anv_device *device, struct anv_queue *queue)
 {
-   vk_object_base_init(&device->vk, &queue->base, VK_OBJECT_TYPE_QUEUE);
+   VkResult result;
+
    queue->device = device;
    queue->flags = 0;
+   queue->lost = false;
+   queue->quit = false;
 
    list_inithead(&queue->queued_submits);
 
+   /* We only need those additional thread/mutex when using a thread for
+    * submission.
+    */
+   if (device->has_thread_submit) {
+      if (pthread_mutex_init(&queue->mutex, NULL) != 0)
+         return vk_error(VK_ERROR_INITIALIZATION_FAILED);
+
+      if (pthread_cond_init(&queue->cond, NULL) != 0) {
+         result = vk_error(VK_ERROR_INITIALIZATION_FAILED);
+         goto fail_mutex;
+      }
+      if (pthread_create(&queue->thread, NULL, anv_queue_task, queue)) {
+         result = vk_error(VK_ERROR_INITIALIZATION_FAILED);
+         goto fail_cond;
+      }
+   }
+
+   vk_object_base_init(&device->vk, &queue->base, VK_OBJECT_TYPE_QUEUE);
+
    return VK_SUCCESS;
+
+ fail_cond:
+   pthread_cond_destroy(&queue->cond);
+ fail_mutex:
+   pthread_mutex_destroy(&queue->mutex);
+
+   return result;
 }
 
 void
 anv_queue_finish(struct anv_queue *queue)
 {
    vk_object_base_finish(&queue->base);
+
+   if (!queue->device->has_thread_submit)
+      return;
+
+   pthread_mutex_lock(&queue->mutex);
+   pthread_cond_broadcast(&queue->cond);
+   queue->quit = true;
+   pthread_mutex_unlock(&queue->mutex);
+
+   void *ret;
+   pthread_join(queue->thread, &ret);
+
+   pthread_cond_destroy(&queue->cond);
+   pthread_mutex_destroy(&queue->mutex);
 }
 
 static VkResult
@@ -405,14 +552,14 @@ anv_queue_submit_add_fence_bo(struct anv_queue_submit *submit,
 {
    if (submit->fence_bo_count >= submit->fence_bo_array_length) {
       uint32_t new_len = MAX2(submit->fence_bo_array_length * 2, 64);
-
-      submit->fence_bos =
+      uintptr_t *new_fence_bos =
          vk_realloc(submit->alloc,
                     submit->fence_bos, new_len * sizeof(*submit->fence_bos),
                     8, submit->alloc_scope);
-      if (submit->fence_bos == NULL)
+      if (new_fence_bos == NULL)
          return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
 
+      submit->fence_bos = new_fence_bos;
       submit->fence_bo_array_length = new_len;
    }
 
@@ -427,27 +574,70 @@ anv_queue_submit_add_fence_bo(struct anv_queue_submit *submit,
 static VkResult
 anv_queue_submit_add_syncobj(struct anv_queue_submit* submit,
                              struct anv_device *device,
-                             uint32_t handle, uint32_t flags)
+                             uint32_t handle, uint32_t flags,
+                             uint64_t value)
 {
    assert(flags != 0);
 
+   if (device->has_thread_submit && (flags & I915_EXEC_FENCE_WAIT)) {
+      if (submit->wait_timeline_count >= submit->wait_timeline_array_length) {
+         uint32_t new_len = MAX2(submit->wait_timeline_array_length * 2, 64);
+
+         uint32_t *new_wait_timeline_syncobjs =
+            vk_realloc(submit->alloc,
+                       submit->wait_timeline_syncobjs,
+                       new_len * sizeof(*submit->wait_timeline_syncobjs),
+                       8, submit->alloc_scope);
+         if (new_wait_timeline_syncobjs == NULL)
+            return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+
+         submit->wait_timeline_syncobjs = new_wait_timeline_syncobjs;
+
+         uint64_t *new_wait_timeline_values =
+            vk_realloc(submit->alloc,
+                       submit->wait_timeline_values, new_len * sizeof(*submit->wait_timeline_values),
+                       8, submit->alloc_scope);
+         if (new_wait_timeline_values == NULL)
+            return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+
+         submit->wait_timeline_values = new_wait_timeline_values;
+         submit->wait_timeline_array_length = new_len;
+      }
+
+      submit->wait_timeline_syncobjs[submit->wait_timeline_count] = handle;
+      submit->wait_timeline_values[submit->wait_timeline_count] = value;
+
+      submit->wait_timeline_count++;
+   }
+
    if (submit->fence_count >= submit->fence_array_length) {
       uint32_t new_len = MAX2(submit->fence_array_length * 2, 64);
-
-      submit->fences =
+      struct drm_i915_gem_exec_fence *new_fences =
          vk_realloc(submit->alloc,
                     submit->fences, new_len * sizeof(*submit->fences),
                     8, submit->alloc_scope);
-      if (submit->fences == NULL)
+      if (new_fences == NULL)
+         return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+
+      submit->fences = new_fences;
+
+      uint64_t *new_fence_values =
+         vk_realloc(submit->alloc,
+                    submit->fence_values, new_len * sizeof(*submit->fence_values),
+                    8, submit->alloc_scope);
+      if (new_fence_values == NULL)
          return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
 
+      submit->fence_values = new_fence_values;
       submit->fence_array_length = new_len;
    }
 
-   submit->fences[submit->fence_count++] = (struct drm_i915_gem_exec_fence) {
+   submit->fences[submit->fence_count] = (struct drm_i915_gem_exec_fence) {
       .handle = handle,
       .flags = flags,
    };
+   submit->fence_values[submit->fence_count] = value;
+   submit->fence_count++;
 
    return VK_SUCCESS;
 }
@@ -483,21 +673,24 @@ anv_queue_submit_add_timeline_wait(struct anv_queue_submit* submit,
 {
    if (submit->wait_timeline_count >= submit->wait_timeline_array_length) {
       uint32_t new_len = MAX2(submit->wait_timeline_array_length * 2, 64);
-
-      submit->wait_timelines =
+      struct anv_timeline **new_wait_timelines =
          vk_realloc(submit->alloc,
                     submit->wait_timelines, new_len * sizeof(*submit->wait_timelines),
                     8, submit->alloc_scope);
-      if (submit->wait_timelines == NULL)
+      if (new_wait_timelines == NULL)
          return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
 
-      submit->wait_timeline_values =
+      submit->wait_timelines = new_wait_timelines;
+
+      uint64_t *new_wait_timeline_values =
          vk_realloc(submit->alloc,
                     submit->wait_timeline_values, new_len * sizeof(*submit->wait_timeline_values),
                     8, submit->alloc_scope);
-      if (submit->wait_timeline_values == NULL)
+      if (new_wait_timeline_values == NULL)
          return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
 
+      submit->wait_timeline_values = new_wait_timeline_values;
+
       submit->wait_timeline_array_length = new_len;
    }
 
@@ -519,21 +712,24 @@ anv_queue_submit_add_timeline_signal(struct anv_queue_submit* submit,
 
    if (submit->signal_timeline_count >= submit->signal_timeline_array_length) {
       uint32_t new_len = MAX2(submit->signal_timeline_array_length * 2, 64);
-
-      submit->signal_timelines =
+      struct anv_timeline **new_signal_timelines =
          vk_realloc(submit->alloc,
                     submit->signal_timelines, new_len * sizeof(*submit->signal_timelines),
                     8, submit->alloc_scope);
-      if (submit->signal_timelines == NULL)
+      if (new_signal_timelines == NULL)
             return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
 
-      submit->signal_timeline_values =
+      submit->signal_timelines = new_signal_timelines;
+
+      uint64_t *new_signal_timeline_values =
          vk_realloc(submit->alloc,
                     submit->signal_timeline_values, new_len * sizeof(*submit->signal_timeline_values),
                     8, submit->alloc_scope);
-      if (submit->signal_timeline_values == NULL)
+      if (new_signal_timeline_values == NULL)
          return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
 
+      submit->signal_timeline_values = new_signal_timeline_values;
+
       submit->signal_timeline_array_length = new_len;
    }
 
@@ -589,7 +785,7 @@ anv_queue_submit_simple_batch(struct anv_queue *queue,
       }
 
       result = anv_queue_submit_add_syncobj(submit, device, syncobj,
-                                            I915_EXEC_FENCE_SIGNAL);
+                                            I915_EXEC_FENCE_SIGNAL, 0);
    } else {
       result = anv_device_alloc_bo(device, 4096,
                                    ANV_BO_ALLOC_EXTERNAL |
@@ -736,7 +932,6 @@ anv_queue_submit(struct anv_queue *queue,
    submit->cmd_buffer = cmd_buffer;
 
    VkResult result = VK_SUCCESS;
-
    for (uint32_t i = 0; i < num_in_semaphores; i++) {
       ANV_FROM_HANDLE(anv_semaphore, semaphore, in_semaphores[i]);
       struct anv_semaphore_impl *impl;
@@ -790,7 +985,8 @@ anv_queue_submit(struct anv_queue *queue,
       case ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ: {
          result = anv_queue_submit_add_syncobj(submit, device,
                                                impl->syncobj,
-                                               I915_EXEC_FENCE_WAIT);
+                                               I915_EXEC_FENCE_WAIT,
+                                               0);
          if (result != VK_SUCCESS)
             goto error;
          break;
@@ -804,6 +1000,15 @@ anv_queue_submit(struct anv_queue *queue,
             goto error;
          break;
 
+      case ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ_TIMELINE:
+         result = anv_queue_submit_add_syncobj(submit, device,
+                                               impl->syncobj,
+                                               I915_EXEC_FENCE_WAIT,
+                                               in_values ? in_values[i] : 0);
+         if (result != VK_SUCCESS)
+            goto error;
+         break;
+
       default:
          break;
       }
@@ -844,7 +1049,8 @@ anv_queue_submit(struct anv_queue *queue,
 
       case ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ: {
          result = anv_queue_submit_add_syncobj(submit, device, impl->syncobj,
-                                               I915_EXEC_FENCE_SIGNAL);
+                                               I915_EXEC_FENCE_SIGNAL,
+                                               0);
          if (result != VK_SUCCESS)
             goto error;
          break;
@@ -858,6 +1064,14 @@ anv_queue_submit(struct anv_queue *queue,
             goto error;
          break;
 
+      case ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ_TIMELINE:
+         result = anv_queue_submit_add_syncobj(submit, device, impl->syncobj,
+                                               I915_EXEC_FENCE_SIGNAL,
+                                               out_values ? out_values[i] : 0);
+         if (result != VK_SUCCESS)
+            goto error;
+         break;
+
       default:
          break;
       }
@@ -887,6 +1101,7 @@ anv_queue_submit(struct anv_queue *queue,
 
       switch (impl->type) {
       case ANV_FENCE_TYPE_BO:
+         assert(!device->has_thread_submit);
          result = anv_queue_submit_add_fence_bo(submit, impl->bo.bo, true /* signal */);
          if (result != VK_SUCCESS)
             goto error;
@@ -898,8 +1113,11 @@ anv_queue_submit(struct anv_queue *queue,
           * also reset the fence's syncobj so that they don't contain a
           * signaled dma-fence.
           */
+         anv_gem_syncobj_reset(device, impl->syncobj);
+
          result = anv_queue_submit_add_syncobj(submit, device, impl->syncobj,
-                                               I915_EXEC_FENCE_SIGNAL);
+                                               I915_EXEC_FENCE_SIGNAL,
+                                               0);
          if (result != VK_SUCCESS)
             goto error;
          break;
@@ -915,6 +1133,7 @@ anv_queue_submit(struct anv_queue *queue,
       goto error;
 
    if (fence && fence->permanent.type == ANV_FENCE_TYPE_BO) {
+      assert(!device->has_thread_submit);
       /* If we have permanent BO fence, the only type of temporary possible
        * would be BO_WSI (because BO fences are not shareable). The Vulkan spec
        * also requires that the fence passed to vkQueueSubmit() be :
@@ -1285,16 +1504,34 @@ VkResult anv_GetFenceStatus(
       }
 
    case ANV_FENCE_TYPE_SYNCOBJ: {
-      int ret = anv_gem_syncobj_wait(device, &impl->syncobj, 1, 0, true);
-      if (ret == -1) {
-         if (errno == ETIME) {
-            return VK_NOT_READY;
+      if (device->has_thread_submit) {
+         uint64_t binary_value = 0;
+         int ret = anv_gem_syncobj_timeline_wait(device, &impl->syncobj,
+                                             &binary_value, 1, 0,
+                                             true /* wait_all */,
+                                             false /* wait_materialize */);
+         if (ret == -1) {
+            if (errno == ETIME) {
+               return VK_NOT_READY;
+            } else {
+               /* We don't know the real error. */
+               return anv_device_set_lost(device, "drm_syncobj_wait failed: %m");
+            }
          } else {
-            /* We don't know the real error. */
-            return anv_device_set_lost(device, "drm_syncobj_wait failed: %m");
+            return VK_SUCCESS;
          }
       } else {
-         return VK_SUCCESS;
+         int ret = anv_gem_syncobj_wait(device, &impl->syncobj, 1, 0, false);
+         if (ret == -1) {
+            if (errno == ETIME) {
+               return VK_NOT_READY;
+            } else {
+               /* We don't know the real error. */
+               return anv_device_set_lost(device, "drm_syncobj_wait failed: %m");
+            }
+         } else {
+            return VK_SUCCESS;
+         }
       }
    }
 
@@ -1328,11 +1565,11 @@ anv_wait_for_syncobj_fences(struct anv_device *device,
       syncobjs[i] = impl->syncobj;
    }
 
+   int ret = 0;
    /* The gem_syncobj_wait ioctl may return early due to an inherent
-    * limitation in the way it computes timeouts.  Loop until we've actually
+    * limitation in the way it computes timeouts. Loop until we've actually
     * passed the timeout.
     */
-   int ret;
    do {
       ret = anv_gem_syncobj_wait(device, syncobjs, fenceCount,
                                  abs_timeout_ns, waitAll);
@@ -1490,6 +1727,8 @@ anv_wait_for_fences(struct anv_device *device,
 
          switch (impl->type) {
          case ANV_FENCE_TYPE_BO:
+            assert(!device->physical->has_syncobj_wait);
+            /* fall-through */
          case ANV_FENCE_TYPE_WSI_BO:
             result = anv_wait_for_bo_fences(device, 1, &pFences[i],
                                             true, abs_timeout);
@@ -1689,6 +1928,31 @@ VkResult anv_ImportFenceFdKHR(
    return VK_SUCCESS;
 }
 
+/* The sideband payload of the DRM syncobj was incremented when the
+ * application called vkQueueSubmit(). Here we wait for a fence with the same
+ * value to materialize so that we can exporting (typically as a SyncFD).
+ */
+static VkResult
+wait_syncobj_materialize(struct anv_device *device,
+                         uint32_t syncobj,
+                         int *fd)
+{
+   if (!device->has_thread_submit)
+      return VK_SUCCESS;
+
+   uint64_t binary_value = 0;
+   /* We might need to wait until the fence materializes before we can
+    * export to a sync FD when we use a thread for submission.
+    */
+   if (anv_gem_syncobj_timeline_wait(device, &syncobj, &binary_value, 1,
+                                     anv_get_absolute_timeout(5ull * NSEC_PER_SEC),
+                                     true /* wait_all */,
+                                     true /* wait_materialize */))
+      return anv_device_set_lost(device, "anv_gem_syncobj_timeline_wait failed: %m");
+
+   return VK_SUCCESS;
+}
+
 VkResult anv_GetFenceFdKHR(
     VkDevice                                    _device,
     const VkFenceGetFdInfoKHR*                  pGetFdInfo,
@@ -1715,6 +1979,10 @@ VkResult anv_GetFenceFdKHR(
    }
 
    case VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT: {
+      VkResult result = wait_syncobj_materialize(device, impl->syncobj, pFd);
+      if (result != VK_SUCCESS)
+         return result;
+
       int fd = anv_gem_syncobj_export_sync_file(device, impl->syncobj);
       if (fd < 0)
          return vk_error(VK_ERROR_TOO_MANY_OBJECTS);
@@ -1788,8 +2056,24 @@ timeline_semaphore_create(struct anv_device *device,
                           struct anv_semaphore_impl *impl,
                           uint64_t initial_value)
 {
-   impl->type = ANV_SEMAPHORE_TYPE_TIMELINE;
-   anv_timeline_init(device, &impl->timeline, initial_value);
+   if (device->has_thread_submit) {
+      impl->type = ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ_TIMELINE;
+      impl->syncobj = anv_gem_syncobj_create(device, 0);
+      if (!impl->syncobj)
+         return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+      if (initial_value) {
+         if (anv_gem_syncobj_timeline_signal(device,
+                                             &impl->syncobj,
+                                             &initial_value, 1)) {
+            anv_gem_syncobj_destroy(device, impl->syncobj);
+            return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+         }
+      }
+   } else {
+      impl->type = ANV_SEMAPHORE_TYPE_TIMELINE;
+      anv_timeline_init(device, &impl->timeline, initial_value);
+   }
+
    return VK_SUCCESS;
 }
 
@@ -1818,7 +2102,7 @@ VkResult anv_CreateSemaphore(
 
    const VkExportSemaphoreCreateInfo *export =
       vk_find_struct_const(pCreateInfo->pNext, EXPORT_SEMAPHORE_CREATE_INFO);
-    VkExternalSemaphoreHandleTypeFlags handleTypes =
+   VkExternalSemaphoreHandleTypeFlags handleTypes =
       export ? export->handleTypes : 0;
    VkResult result;
 
@@ -1833,8 +2117,10 @@ VkResult anv_CreateSemaphore(
       }
    } else if (handleTypes & VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT) {
       assert(handleTypes == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT);
-      assert(sem_type == VK_SEMAPHORE_TYPE_BINARY_KHR);
-      result = binary_semaphore_create(device, &semaphore->permanent, true);
+      if (sem_type == VK_SEMAPHORE_TYPE_BINARY_KHR)
+         result = binary_semaphore_create(device, &semaphore->permanent, true);
+      else
+         result = timeline_semaphore_create(device, &semaphore->permanent, timeline_value);
       if (result != VK_SUCCESS) {
          vk_free2(&device->vk.alloc, pAllocator, semaphore);
          return result;
@@ -1891,6 +2177,7 @@ anv_semaphore_impl_cleanup(struct anv_device *device,
       break;
 
    case ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ:
+   case ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ_TIMELINE:
       anv_gem_syncobj_destroy(device, impl->syncobj);
       break;
 
@@ -1958,8 +2245,10 @@ void anv_GetPhysicalDeviceExternalSemaphoreProperties(
 
    switch (pExternalSemaphoreInfo->handleType) {
    case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT:
-      /* Timeline semaphores are not exportable. */
-      if (sem_type == VK_SEMAPHORE_TYPE_TIMELINE_KHR)
+      /* Timeline semaphores are not exportable, unless we have threaded
+       * submission.
+       */
+      if (sem_type == VK_SEMAPHORE_TYPE_TIMELINE_KHR && !device->has_thread_submit)
          break;
       pExternalSemaphoreProperties->exportFromImportedHandleTypes =
          VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT;
@@ -2008,7 +2297,15 @@ VkResult anv_ImportSemaphoreFdKHR(
    switch (pImportSemaphoreFdInfo->handleType) {
    case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT:
       if (device->physical->has_syncobj) {
-         new_impl.type = ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ;
+         /* When importing non temporarily, reuse the semaphore's existing
+          * type. The Linux/DRM implementation allows to interchangeably use
+          * binary & timeline semaphores and we have no way to differenciate
+          * them.
+          */
+         if (pImportSemaphoreFdInfo->flags & VK_SEMAPHORE_IMPORT_TEMPORARY_BIT)
+            new_impl.type = ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ;
+         else
+            new_impl.type = semaphore->permanent.type;
 
          new_impl.syncobj = anv_gem_syncobj_fd_to_handle(device, fd);
          if (!new_impl.syncobj)
@@ -2049,22 +2346,31 @@ VkResult anv_ImportSemaphoreFdKHR(
 
    case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT:
       if (device->physical->has_syncobj) {
+         uint32_t create_flags = 0;
+
+         if (fd == -1)
+            create_flags |= DRM_SYNCOBJ_CREATE_SIGNALED;
+
          new_impl = (struct anv_semaphore_impl) {
             .type = ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ,
-            .syncobj = anv_gem_syncobj_create(device, 0),
+            .syncobj = anv_gem_syncobj_create(device, create_flags),
          };
+
          if (!new_impl.syncobj)
             return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
-         if (anv_gem_syncobj_import_sync_file(device, new_impl.syncobj, fd)) {
-            anv_gem_syncobj_destroy(device, new_impl.syncobj);
-            return vk_errorf(device, NULL, VK_ERROR_INVALID_EXTERNAL_HANDLE,
-                             "syncobj sync file import failed: %m");
+
+         if (fd != -1) {
+            if (anv_gem_syncobj_import_sync_file(device, new_impl.syncobj, fd)) {
+               anv_gem_syncobj_destroy(device, new_impl.syncobj);
+               return vk_errorf(device, NULL, VK_ERROR_INVALID_EXTERNAL_HANDLE,
+                                "syncobj sync file import failed: %m");
+            }
+            /* Ownership of the FD is transfered to Anv. Since we don't need it
+             * anymore because the associated fence has been put into a syncobj,
+             * we must close the FD.
+             */
+            close(fd);
          }
-         /* Ownership of the FD is transfered to Anv. Since we don't need it
-          * anymore because the associated fence has been put into a syncobj,
-          * we must close the FD.
-          */
-         close(fd);
       } else {
          new_impl = (struct anv_semaphore_impl) {
             .type = ANV_SEMAPHORE_TYPE_SYNC_FILE,
@@ -2153,9 +2459,13 @@ VkResult anv_GetSemaphoreFdKHR(
    }
 
    case ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ:
-      if (pGetFdInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT)
+      if (pGetFdInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT) {
+         VkResult result = wait_syncobj_materialize(device, impl->syncobj, pFd);
+         if (result != VK_SUCCESS)
+            return result;
+
          fd = anv_gem_syncobj_export_sync_file(device, impl->syncobj);
-      else {
+      else {
          assert(pGetFdInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT);
          fd = anv_gem_syncobj_handle_to_fd(device, impl->syncobj);
       }
@@ -2164,6 +2474,14 @@ VkResult anv_GetSemaphoreFdKHR(
       *pFd = fd;
       break;
 
+   case ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ_TIMELINE:
+      assert(pGetFdInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT);
+      fd = anv_gem_syncobj_handle_to_fd(device, impl->syncobj);
+      if (fd < 0)
+         return vk_error(VK_ERROR_TOO_MANY_OBJECTS);
+      *pFd = fd;
+      break;
+
    default:
       return vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE);
    }
@@ -2202,6 +2520,15 @@ VkResult anv_GetSemaphoreCounterValue(
       return VK_SUCCESS;
    }
 
+   case ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ_TIMELINE: {
+      int ret = anv_gem_syncobj_timeline_query(device, &impl->syncobj, pValue, 1);
+
+      if (ret != 0)
+         return anv_device_set_lost(device, "unable to query timeline syncobj");
+
+      return VK_SUCCESS;
+   }
+
    default:
       unreachable("Invalid semaphore type");
    }
@@ -2221,8 +2548,8 @@ anv_timeline_wait_locked(struct anv_device *device,
          .tv_nsec = abs_timeout_ns % NSEC_PER_SEC,
       };
 
-      int ret = pthread_cond_timedwait(&device->queue_submit,
-                                       &device->mutex, &abstime);
+      UNUSED int ret = pthread_cond_timedwait(&device->queue_submit,
+                                              &device->mutex, &abstime);
       assert(ret != EINVAL);
       if (anv_gettime_ns() >= abs_timeout_ns &&
           timeline->highest_pending < serial)
@@ -2321,24 +2648,22 @@ VkResult anv_WaitSemaphores(
     uint64_t                                    timeout)
 {
    ANV_FROM_HANDLE(anv_device, device, _device);
+   uint32_t *handles;
+   struct anv_timeline **timelines;
+   uint64_t *values;
 
-   if (device->no_hw)
-      return VK_SUCCESS;
+   ANV_MULTIALLOC(ma);
 
-   struct anv_timeline **timelines =
-      vk_alloc(&device->vk.alloc,
-               pWaitInfo->semaphoreCount * sizeof(*timelines),
-               8, VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
-   if (!timelines)
-      return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+   anv_multialloc_add(&ma, &values, pWaitInfo->semaphoreCount);
+   if (device->has_thread_submit) {
+      anv_multialloc_add(&ma, &handles, pWaitInfo->semaphoreCount);
+   } else {
+      anv_multialloc_add(&ma, &timelines, pWaitInfo->semaphoreCount);
+   }
 
-   uint64_t *values = vk_alloc(&device->vk.alloc,
-                               pWaitInfo->semaphoreCount * sizeof(*values),
-                               8, VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
-   if (!values) {
-      vk_free(&device->vk.alloc, timelines);
+   if (!anv_multialloc_alloc(&ma, &device->vk.alloc,
+                             VK_SYSTEM_ALLOCATION_SCOPE_COMMAND))
       return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
-   }
 
    uint32_t handle_count = 0;
    for (uint32_t i = 0; i < pWaitInfo->semaphoreCount; i++) {
@@ -2347,24 +2672,40 @@ VkResult anv_WaitSemaphores(
          semaphore->temporary.type != ANV_SEMAPHORE_TYPE_NONE ?
          &semaphore->temporary : &semaphore->permanent;
 
-      assert(impl->type == ANV_SEMAPHORE_TYPE_TIMELINE);
-
       if (pWaitInfo->pValues[i] == 0)
          continue;
 
-      timelines[handle_count] = &impl->timeline;
+      if (device->has_thread_submit) {
+         assert(impl->type == ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ_TIMELINE);
+         handles[handle_count] = impl->syncobj;
+      } else {
+         assert(impl->type == ANV_SEMAPHORE_TYPE_TIMELINE);
+         timelines[handle_count] = &impl->timeline;
+      }
       values[handle_count] = pWaitInfo->pValues[i];
       handle_count++;
    }
 
    VkResult result = VK_SUCCESS;
    if (handle_count > 0) {
-      result = anv_timelines_wait(device, timelines, values, handle_count,
-                                  !(pWaitInfo->flags & VK_SEMAPHORE_WAIT_ANY_BIT_KHR),
-                                  anv_get_absolute_timeout(timeout));
+      if (device->has_thread_submit) {
+         int ret =
+            anv_gem_syncobj_timeline_wait(device,
+                                          handles, values, handle_count,
+                                          anv_get_absolute_timeout(timeout),
+                                          !(pWaitInfo->flags & VK_SEMAPHORE_WAIT_ANY_BIT_KHR),
+                                          false);
+         if (ret != 0)
+            result = errno == ETIME ? VK_TIMEOUT :
+               anv_device_set_lost(device, "unable to wait on timeline syncobj");
+      } else {
+         result =
+            anv_timelines_wait(device, timelines, values, handle_count,
+                               !(pWaitInfo->flags & VK_SEMAPHORE_WAIT_ANY_BIT_KHR),
+                               anv_get_absolute_timeout(timeout));
+      }
    }
 
-   vk_free(&device->vk.alloc, timelines);
    vk_free(&device->vk.alloc, values);
 
    return result;
@@ -2399,6 +2740,20 @@ VkResult anv_SignalSemaphore(
       return result;
    }
 
+   case ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ_TIMELINE: {
+      /* Timeline semaphores are created with a value of 0, so signaling on 0
+       * is a waste of time.
+       */
+      if (pSignalInfo->value == 0)
+         return VK_SUCCESS;
+
+      int ret = anv_gem_syncobj_timeline_signal(device, &impl->syncobj,
+                                                &pSignalInfo->value, 1);
+
+      return ret == 0 ? VK_SUCCESS :
+         anv_device_set_lost(device, "unable to signal timeline syncobj");
+   }
+
    default:
       unreachable("Invalid semaphore type");
    }