* This file implements VkQueue, VkFence, and VkSemaphore
*/
+#include <errno.h>
#include <fcntl.h>
#include <unistd.h>
+#include "util/os_file.h"
+
#include "anv_private.h"
#include "vk_util.h"
for (uint32_t i = 0; i < submit->sync_fd_semaphore_count; i++)
anv_semaphore_unref(device, submit->sync_fd_semaphores[i]);
/* Execbuf does not consume the in_fence. It's our job to close it. */
- if (submit->in_fence != -1)
+ if (submit->in_fence != -1) {
+ assert(!device->has_thread_submit);
close(submit->in_fence);
- if (submit->out_fence != -1)
+ }
+ if (submit->out_fence != -1) {
+ assert(!device->has_thread_submit);
close(submit->out_fence);
+ }
vk_free(alloc, submit->fences);
+ vk_free(alloc, submit->fence_values);
vk_free(alloc, submit->temporary_semaphores);
+ vk_free(alloc, submit->wait_timelines);
+ vk_free(alloc, submit->wait_timeline_values);
+ vk_free(alloc, submit->signal_timelines);
+ vk_free(alloc, submit->signal_timeline_values);
vk_free(alloc, submit->fence_bos);
vk_free(alloc, submit);
}
+static bool
+anv_queue_submit_ready_locked(struct anv_queue_submit *submit)
+{
+ for (uint32_t i = 0; i < submit->wait_timeline_count; i++) {
+ if (submit->wait_timeline_values[i] > submit->wait_timelines[i]->highest_pending)
+ return false;
+ }
+
+ return true;
+}
+
static VkResult
-_anv_queue_submit(struct anv_queue *queue, struct anv_queue_submit **_submit)
+anv_timeline_init(struct anv_device *device,
+ struct anv_timeline *timeline,
+ uint64_t initial_value)
{
- struct anv_queue_submit *submit = *_submit;
- VkResult result = anv_queue_execbuf(queue, submit);
+ timeline->highest_past =
+ timeline->highest_pending = initial_value;
+ list_inithead(&timeline->points);
+ list_inithead(&timeline->free_points);
+
+ return VK_SUCCESS;
+}
+
+static void
+anv_timeline_finish(struct anv_device *device,
+ struct anv_timeline *timeline)
+{
+ list_for_each_entry_safe(struct anv_timeline_point, point,
+ &timeline->free_points, link) {
+ list_del(&point->link);
+ anv_device_release_bo(device, point->bo);
+ vk_free(&device->vk.alloc, point);
+ }
+ list_for_each_entry_safe(struct anv_timeline_point, point,
+ &timeline->points, link) {
+ list_del(&point->link);
+ anv_device_release_bo(device, point->bo);
+ vk_free(&device->vk.alloc, point);
+ }
+}
+
+static VkResult
+anv_timeline_add_point_locked(struct anv_device *device,
+ struct anv_timeline *timeline,
+ uint64_t value,
+ struct anv_timeline_point **point)
+{
+ VkResult result = VK_SUCCESS;
+
+ if (list_is_empty(&timeline->free_points)) {
+ *point =
+ vk_zalloc(&device->vk.alloc, sizeof(**point),
+ 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
+ if (!(*point))
+ result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+ if (result == VK_SUCCESS) {
+ result = anv_device_alloc_bo(device, 4096,
+ ANV_BO_ALLOC_EXTERNAL |
+ ANV_BO_ALLOC_IMPLICIT_SYNC,
+ 0 /* explicit_address */,
+ &(*point)->bo);
+ if (result != VK_SUCCESS)
+ vk_free(&device->vk.alloc, *point);
+ }
+ } else {
+ *point = list_first_entry(&timeline->free_points,
+ struct anv_timeline_point, link);
+ list_del(&(*point)->link);
+ }
if (result == VK_SUCCESS) {
+ (*point)->serial = value;
+ list_addtail(&(*point)->link, &timeline->points);
+ }
+
+ return result;
+}
+
+static VkResult
+anv_timeline_gc_locked(struct anv_device *device,
+ struct anv_timeline *timeline)
+{
+ list_for_each_entry_safe(struct anv_timeline_point, point,
+ &timeline->points, link) {
+ /* timeline->higest_pending is only incremented once submission has
+ * happened. If this point has a greater serial, it means the point
+ * hasn't been submitted yet.
+ */
+ if (point->serial > timeline->highest_pending)
+ return VK_SUCCESS;
+
+ /* If someone is waiting on this time point, consider it busy and don't
+ * try to recycle it. There's a slim possibility that it's no longer
+ * busy by the time we look at it but we would be recycling it out from
+ * under a waiter and that can lead to weird races.
+ *
+ * We walk the list in-order so if this time point is still busy so is
+ * every following time point
+ */
+ assert(point->waiting >= 0);
+ if (point->waiting)
+ return VK_SUCCESS;
+
+ /* Garbage collect any signaled point. */
+ VkResult result = anv_device_bo_busy(device, point->bo);
+ if (result == VK_NOT_READY) {
+ /* We walk the list in-order so if this time point is still busy so
+ * is every following time point
+ */
+ return VK_SUCCESS;
+ } else if (result != VK_SUCCESS) {
+ return result;
+ }
+
+ assert(timeline->highest_past < point->serial);
+ timeline->highest_past = point->serial;
+
+ list_del(&point->link);
+ list_add(&point->link, &timeline->free_points);
+ }
+
+ return VK_SUCCESS;
+}
+
+static VkResult anv_queue_submit_add_fence_bo(struct anv_queue_submit *submit,
+ struct anv_bo *bo,
+ bool signal);
+
+static VkResult
+anv_queue_submit_timeline_locked(struct anv_queue *queue,
+ struct anv_queue_submit *submit)
+{
+ VkResult result;
+
+ for (uint32_t i = 0; i < submit->wait_timeline_count; i++) {
+ struct anv_timeline *timeline = submit->wait_timelines[i];
+ uint64_t wait_value = submit->wait_timeline_values[i];
+
+ if (timeline->highest_past >= wait_value)
+ continue;
+
+ list_for_each_entry(struct anv_timeline_point, point, &timeline->points, link) {
+ if (point->serial < wait_value)
+ continue;
+ result = anv_queue_submit_add_fence_bo(submit, point->bo, false);
+ if (result != VK_SUCCESS)
+ return result;
+ break;
+ }
+ }
+ for (uint32_t i = 0; i < submit->signal_timeline_count; i++) {
+ struct anv_timeline *timeline = submit->signal_timelines[i];
+ uint64_t signal_value = submit->signal_timeline_values[i];
+ struct anv_timeline_point *point;
+
+ result = anv_timeline_add_point_locked(queue->device, timeline,
+ signal_value, &point);
+ if (result != VK_SUCCESS)
+ return result;
+
+ result = anv_queue_submit_add_fence_bo(submit, point->bo, true);
+ if (result != VK_SUCCESS)
+ return result;
+ }
+
+ result = anv_queue_execbuf_locked(queue, submit);
+
+ if (result == VK_SUCCESS) {
+ /* Update the pending values in the timeline objects. */
+ for (uint32_t i = 0; i < submit->signal_timeline_count; i++) {
+ struct anv_timeline *timeline = submit->signal_timelines[i];
+ uint64_t signal_value = submit->signal_timeline_values[i];
+
+ assert(signal_value > timeline->highest_pending);
+ timeline->highest_pending = signal_value;
+ }
+
/* Update signaled semaphores backed by syncfd. */
for (uint32_t i = 0; i < submit->sync_fd_semaphore_count; i++) {
struct anv_semaphore *semaphore = submit->sync_fd_semaphores[i];
struct anv_semaphore_impl *impl = &semaphore->permanent;
assert(impl->type == ANV_SEMAPHORE_TYPE_SYNC_FILE);
- impl->fd = dup(submit->out_fence);
+ impl->fd = os_dupfd_cloexec(submit->out_fence);
+ }
+ } else {
+ /* Unblock any waiter by signaling the points, the application will get
+ * a device lost error code.
+ */
+ for (uint32_t i = 0; i < submit->signal_timeline_count; i++) {
+ struct anv_timeline *timeline = submit->signal_timelines[i];
+ uint64_t signal_value = submit->signal_timeline_values[i];
+
+ assert(signal_value > timeline->highest_pending);
+ timeline->highest_past = timeline->highest_pending = signal_value;
}
}
return result;
}
+static VkResult
+anv_queue_submit_deferred_locked(struct anv_queue *queue, uint32_t *advance)
+{
+ VkResult result = VK_SUCCESS;
+
+ /* Go through all the queued submissions and submit then until we find one
+ * that's waiting on a point that hasn't materialized yet.
+ */
+ list_for_each_entry_safe(struct anv_queue_submit, submit,
+ &queue->queued_submits, link) {
+ if (!anv_queue_submit_ready_locked(submit))
+ break;
+
+ (*advance)++;
+ list_del(&submit->link);
+
+ result = anv_queue_submit_timeline_locked(queue, submit);
+
+ anv_queue_submit_free(queue->device, submit);
+
+ if (result != VK_SUCCESS)
+ break;
+ }
+
+ return result;
+}
+
+static VkResult
+anv_device_submit_deferred_locked(struct anv_device *device)
+{
+ uint32_t advance = 0;
+ return anv_queue_submit_deferred_locked(&device->queue, &advance);
+}
+
+static void
+anv_queue_submit_signal_fences(struct anv_device *device,
+ struct anv_queue_submit *submit)
+{
+ for (uint32_t i = 0; i < submit->fence_count; i++) {
+ if (submit->fences[i].flags & I915_EXEC_FENCE_SIGNAL) {
+ anv_gem_syncobj_timeline_signal(device, &submit->fences[i].handle,
+ &submit->fence_values[i], 1);
+ }
+ }
+}
+
+static void *
+anv_queue_task(void *_queue)
+{
+ struct anv_queue *queue = _queue;
+
+ pthread_mutex_lock(&queue->mutex);
+
+ while (!queue->quit) {
+ while (!list_is_empty(&queue->queued_submits)) {
+ struct anv_queue_submit *submit =
+ list_first_entry(&queue->queued_submits, struct anv_queue_submit, link);
+ list_del(&submit->link);
+
+ pthread_mutex_unlock(&queue->mutex);
+
+ VkResult result = VK_ERROR_DEVICE_LOST;
+
+ /* Wait for timeline points to materialize before submitting. We need
+ * to do this because we're using threads to do the submit to i915.
+ * We could end up in a situation where the application submits to 2
+ * queues with the first submit creating the dma-fence for the
+ * second. But because the scheduling of the submission threads might
+ * wakeup the second queue thread first, this would make that execbuf
+ * fail because the dma-fence it depends on hasn't materialized yet.
+ */
+ if (!queue->lost && submit->wait_timeline_count > 0) {
+ int ret = queue->device->no_hw ? 0 :
+ anv_gem_syncobj_timeline_wait(
+ queue->device, submit->wait_timeline_syncobjs,
+ submit->wait_timeline_values, submit->wait_timeline_count,
+ anv_get_absolute_timeout(UINT64_MAX) /* wait forever */,
+ true /* wait for all */, true /* wait for materialize */);
+ if (ret) {
+ result = anv_queue_set_lost(queue, "timeline timeout: %s",
+ strerror(errno));
+ }
+ }
+
+ /* Now submit */
+ if (!queue->lost) {
+ pthread_mutex_lock(&queue->device->mutex);
+ result = anv_queue_execbuf_locked(queue, submit);
+ pthread_mutex_unlock(&queue->device->mutex);
+ }
+
+ for (uint32_t i = 0; i < submit->sync_fd_semaphore_count; i++) {
+ struct anv_semaphore *semaphore = submit->sync_fd_semaphores[i];
+ /* Out fences can't have temporary state because that would imply
+ * that we imported a sync file and are trying to signal it.
+ */
+ assert(semaphore->temporary.type == ANV_SEMAPHORE_TYPE_NONE);
+ struct anv_semaphore_impl *impl = &semaphore->permanent;
+
+ assert(impl->type == ANV_SEMAPHORE_TYPE_SYNC_FILE);
+ impl->fd = dup(submit->out_fence);
+ }
+
+ if (result != VK_SUCCESS) {
+ /* vkQueueSubmit or some other entry point will report the
+ * DEVICE_LOST error at some point, but until we have emptied our
+ * list of execbufs we need to wake up all potential the waiters
+ * until one of them spots the error.
+ */
+ anv_queue_submit_signal_fences(queue->device, submit);
+ }
+
+ anv_queue_submit_free(queue->device, submit);
+
+ pthread_mutex_lock(&queue->mutex);
+ }
+
+ if (!queue->quit)
+ pthread_cond_wait(&queue->cond, &queue->mutex);
+ }
+
+ pthread_mutex_unlock(&queue->mutex);
+
+ return NULL;
+}
+
+static VkResult
+_anv_queue_submit(struct anv_queue *queue, struct anv_queue_submit **_submit,
+ bool flush_queue)
+{
+ struct anv_queue_submit *submit = *_submit;
+
+ /* Wait before signal behavior means we might keep alive the
+ * anv_queue_submit object a bit longer, so transfer the ownership to the
+ * anv_queue.
+ */
+ *_submit = NULL;
+ if (queue->device->has_thread_submit) {
+ pthread_mutex_lock(&queue->mutex);
+ pthread_cond_broadcast(&queue->cond);
+ list_addtail(&submit->link, &queue->queued_submits);
+ pthread_mutex_unlock(&queue->mutex);
+ return VK_SUCCESS;
+ } else {
+ pthread_mutex_lock(&queue->device->mutex);
+ list_addtail(&submit->link, &queue->queued_submits);
+ VkResult result = anv_device_submit_deferred_locked(queue->device);
+ if (flush_queue) {
+ while (result == VK_SUCCESS && !list_is_empty(&queue->queued_submits)) {
+ int ret = pthread_cond_wait(&queue->device->queue_submit,
+ &queue->device->mutex);
+ if (ret != 0) {
+ result = anv_device_set_lost(queue->device, "wait timeout");
+ break;
+ }
+
+ result = anv_device_submit_deferred_locked(queue->device);
+ }
+ }
+ pthread_mutex_unlock(&queue->device->mutex);
+ return result;
+ }
+}
+
VkResult
anv_queue_init(struct anv_device *device, struct anv_queue *queue)
{
- queue->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
+ VkResult result;
+
queue->device = device;
queue->flags = 0;
+ queue->lost = false;
+ queue->quit = false;
+
+ list_inithead(&queue->queued_submits);
+
+ /* We only need those additional thread/mutex when using a thread for
+ * submission.
+ */
+ if (device->has_thread_submit) {
+ if (pthread_mutex_init(&queue->mutex, NULL) != 0)
+ return vk_error(VK_ERROR_INITIALIZATION_FAILED);
+
+ if (pthread_cond_init(&queue->cond, NULL) != 0) {
+ result = vk_error(VK_ERROR_INITIALIZATION_FAILED);
+ goto fail_mutex;
+ }
+ if (pthread_create(&queue->thread, NULL, anv_queue_task, queue)) {
+ result = vk_error(VK_ERROR_INITIALIZATION_FAILED);
+ goto fail_cond;
+ }
+ }
+
+ vk_object_base_init(&device->vk, &queue->base, VK_OBJECT_TYPE_QUEUE);
return VK_SUCCESS;
+
+ fail_cond:
+ pthread_cond_destroy(&queue->cond);
+ fail_mutex:
+ pthread_mutex_destroy(&queue->mutex);
+
+ return result;
}
void
anv_queue_finish(struct anv_queue *queue)
{
+ vk_object_base_finish(&queue->base);
+
+ if (!queue->device->has_thread_submit)
+ return;
+
+ pthread_mutex_lock(&queue->mutex);
+ pthread_cond_broadcast(&queue->cond);
+ queue->quit = true;
+ pthread_mutex_unlock(&queue->mutex);
+
+ void *ret;
+ pthread_join(queue->thread, &ret);
+
+ pthread_cond_destroy(&queue->cond);
+ pthread_mutex_destroy(&queue->mutex);
}
static VkResult
{
if (submit->fence_bo_count >= submit->fence_bo_array_length) {
uint32_t new_len = MAX2(submit->fence_bo_array_length * 2, 64);
-
- submit->fence_bos =
+ uintptr_t *new_fence_bos =
vk_realloc(submit->alloc,
submit->fence_bos, new_len * sizeof(*submit->fence_bos),
8, submit->alloc_scope);
- if (submit->fence_bos == NULL)
+ if (new_fence_bos == NULL)
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+ submit->fence_bos = new_fence_bos;
submit->fence_bo_array_length = new_len;
}
static VkResult
anv_queue_submit_add_syncobj(struct anv_queue_submit* submit,
struct anv_device *device,
- uint32_t handle, uint32_t flags)
+ uint32_t handle, uint32_t flags,
+ uint64_t value)
{
assert(flags != 0);
+ if (device->has_thread_submit && (flags & I915_EXEC_FENCE_WAIT)) {
+ if (submit->wait_timeline_count >= submit->wait_timeline_array_length) {
+ uint32_t new_len = MAX2(submit->wait_timeline_array_length * 2, 64);
+
+ uint32_t *new_wait_timeline_syncobjs =
+ vk_realloc(submit->alloc,
+ submit->wait_timeline_syncobjs,
+ new_len * sizeof(*submit->wait_timeline_syncobjs),
+ 8, submit->alloc_scope);
+ if (new_wait_timeline_syncobjs == NULL)
+ return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ submit->wait_timeline_syncobjs = new_wait_timeline_syncobjs;
+
+ uint64_t *new_wait_timeline_values =
+ vk_realloc(submit->alloc,
+ submit->wait_timeline_values, new_len * sizeof(*submit->wait_timeline_values),
+ 8, submit->alloc_scope);
+ if (new_wait_timeline_values == NULL)
+ return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ submit->wait_timeline_values = new_wait_timeline_values;
+ submit->wait_timeline_array_length = new_len;
+ }
+
+ submit->wait_timeline_syncobjs[submit->wait_timeline_count] = handle;
+ submit->wait_timeline_values[submit->wait_timeline_count] = value;
+
+ submit->wait_timeline_count++;
+ }
+
if (submit->fence_count >= submit->fence_array_length) {
uint32_t new_len = MAX2(submit->fence_array_length * 2, 64);
-
- submit->fences =
+ struct drm_i915_gem_exec_fence *new_fences =
vk_realloc(submit->alloc,
submit->fences, new_len * sizeof(*submit->fences),
8, submit->alloc_scope);
- if (submit->fences == NULL)
+ if (new_fences == NULL)
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+ submit->fences = new_fences;
+
+ uint64_t *new_fence_values =
+ vk_realloc(submit->alloc,
+ submit->fence_values, new_len * sizeof(*submit->fence_values),
+ 8, submit->alloc_scope);
+ if (new_fence_values == NULL)
+ return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ submit->fence_values = new_fence_values;
submit->fence_array_length = new_len;
}
- submit->fences[submit->fence_count++] = (struct drm_i915_gem_exec_fence) {
+ submit->fences[submit->fence_count] = (struct drm_i915_gem_exec_fence) {
.handle = handle,
.flags = flags,
};
+ submit->fence_values[submit->fence_count] = value;
+ submit->fence_count++;
return VK_SUCCESS;
}
return VK_SUCCESS;
}
+static VkResult
+anv_queue_submit_add_timeline_wait(struct anv_queue_submit* submit,
+ struct anv_device *device,
+ struct anv_timeline *timeline,
+ uint64_t value)
+{
+ if (submit->wait_timeline_count >= submit->wait_timeline_array_length) {
+ uint32_t new_len = MAX2(submit->wait_timeline_array_length * 2, 64);
+ struct anv_timeline **new_wait_timelines =
+ vk_realloc(submit->alloc,
+ submit->wait_timelines, new_len * sizeof(*submit->wait_timelines),
+ 8, submit->alloc_scope);
+ if (new_wait_timelines == NULL)
+ return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ submit->wait_timelines = new_wait_timelines;
+
+ uint64_t *new_wait_timeline_values =
+ vk_realloc(submit->alloc,
+ submit->wait_timeline_values, new_len * sizeof(*submit->wait_timeline_values),
+ 8, submit->alloc_scope);
+ if (new_wait_timeline_values == NULL)
+ return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ submit->wait_timeline_values = new_wait_timeline_values;
+
+ submit->wait_timeline_array_length = new_len;
+ }
+
+ submit->wait_timelines[submit->wait_timeline_count] = timeline;
+ submit->wait_timeline_values[submit->wait_timeline_count] = value;
+
+ submit->wait_timeline_count++;
+
+ return VK_SUCCESS;
+}
+
+static VkResult
+anv_queue_submit_add_timeline_signal(struct anv_queue_submit* submit,
+ struct anv_device *device,
+ struct anv_timeline *timeline,
+ uint64_t value)
+{
+ assert(timeline->highest_pending < value);
+
+ if (submit->signal_timeline_count >= submit->signal_timeline_array_length) {
+ uint32_t new_len = MAX2(submit->signal_timeline_array_length * 2, 64);
+ struct anv_timeline **new_signal_timelines =
+ vk_realloc(submit->alloc,
+ submit->signal_timelines, new_len * sizeof(*submit->signal_timelines),
+ 8, submit->alloc_scope);
+ if (new_signal_timelines == NULL)
+ return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ submit->signal_timelines = new_signal_timelines;
+
+ uint64_t *new_signal_timeline_values =
+ vk_realloc(submit->alloc,
+ submit->signal_timeline_values, new_len * sizeof(*submit->signal_timeline_values),
+ 8, submit->alloc_scope);
+ if (new_signal_timeline_values == NULL)
+ return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ submit->signal_timeline_values = new_signal_timeline_values;
+
+ submit->signal_timeline_array_length = new_len;
+ }
+
+ submit->signal_timelines[submit->signal_timeline_count] = timeline;
+ submit->signal_timeline_values[submit->signal_timeline_count] = value;
+
+ submit->signal_timeline_count++;
+
+ return VK_SUCCESS;
+}
+
static struct anv_queue_submit *
-anv_queue_submit_alloc(struct anv_device *device)
+anv_queue_submit_alloc(struct anv_device *device, int perf_query_pass)
{
- const VkAllocationCallbacks *alloc = &device->alloc;
- VkSystemAllocationScope alloc_scope = VK_SYSTEM_ALLOCATION_SCOPE_COMMAND;
+ const VkAllocationCallbacks *alloc = &device->vk.alloc;
+ VkSystemAllocationScope alloc_scope = VK_SYSTEM_ALLOCATION_SCOPE_DEVICE;
struct anv_queue_submit *submit = vk_zalloc(alloc, sizeof(*submit), 8, alloc_scope);
if (!submit)
submit->alloc_scope = alloc_scope;
submit->in_fence = -1;
submit->out_fence = -1;
+ submit->perf_query_pass = perf_query_pass;
return submit;
}
anv_queue_submit_simple_batch(struct anv_queue *queue,
struct anv_batch *batch)
{
+ if (queue->device->no_hw)
+ return VK_SUCCESS;
+
struct anv_device *device = queue->device;
- struct anv_queue_submit *submit = anv_queue_submit_alloc(device);
+ struct anv_queue_submit *submit = anv_queue_submit_alloc(device, -1);
if (!submit)
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
- bool has_syncobj_wait = device->instance->physicalDevice.has_syncobj_wait;
+ bool has_syncobj_wait = device->physical->has_syncobj_wait;
VkResult result;
uint32_t syncobj;
struct anv_bo *batch_bo, *sync_bo;
}
result = anv_queue_submit_add_syncobj(submit, device, syncobj,
- I915_EXEC_FENCE_SIGNAL);
+ I915_EXEC_FENCE_SIGNAL, 0);
} else {
result = anv_device_alloc_bo(device, 4096,
ANV_BO_ALLOC_EXTERNAL |
ANV_BO_ALLOC_IMPLICIT_SYNC,
+ 0 /* explicit_address */,
&sync_bo);
if (result != VK_SUCCESS)
goto err_free_submit;
submit->simple_bo_size = size;
}
- result = _anv_queue_submit(queue, &submit);
+ result = _anv_queue_submit(queue, &submit, true);
if (result == VK_SUCCESS) {
if (has_syncobj_wait) {
return VK_SUCCESS;
}
+ /* BO backed timeline semaphores cannot be temporary. */
+ assert(impl->type != ANV_SEMAPHORE_TYPE_TIMELINE);
+
/*
* There is a requirement to reset semaphore to their permanent state after
* submission. From the Vulkan 1.0.53 spec:
anv_queue_submit(struct anv_queue *queue,
struct anv_cmd_buffer *cmd_buffer,
const VkSemaphore *in_semaphores,
+ const uint64_t *in_values,
uint32_t num_in_semaphores,
const VkSemaphore *out_semaphores,
+ const uint64_t *out_values,
uint32_t num_out_semaphores,
- VkFence _fence)
+ struct anv_bo *wsi_signal_bo,
+ VkFence _fence,
+ int perf_query_pass)
{
ANV_FROM_HANDLE(anv_fence, fence, _fence);
struct anv_device *device = queue->device;
- UNUSED struct anv_physical_device *pdevice = &device->instance->physicalDevice;
- struct anv_queue_submit *submit = anv_queue_submit_alloc(device);
+ UNUSED struct anv_physical_device *pdevice = device->physical;
+ struct anv_queue_submit *submit = anv_queue_submit_alloc(device, perf_query_pass);
if (!submit)
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
submit->cmd_buffer = cmd_buffer;
VkResult result = VK_SUCCESS;
-
for (uint32_t i = 0; i < num_in_semaphores; i++) {
ANV_FROM_HANDLE(anv_semaphore, semaphore, in_semaphores[i]);
struct anv_semaphore_impl *impl;
goto error;
break;
+ case ANV_SEMAPHORE_TYPE_WSI_BO:
+ /* When using a window-system buffer as a semaphore, always enable
+ * EXEC_OBJECT_WRITE. This gives us a WaR hazard with the display or
+ * compositor's read of the buffer and enforces that we don't start
+ * rendering until they are finished. This is exactly the
+ * synchronization we want with vkAcquireNextImage.
+ */
+ result = anv_queue_submit_add_fence_bo(submit, impl->bo, true /* signal */);
+ if (result != VK_SUCCESS)
+ goto error;
+ break;
+
case ANV_SEMAPHORE_TYPE_SYNC_FILE:
assert(!pdevice->has_syncobj);
if (submit->in_fence == -1) {
case ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ: {
result = anv_queue_submit_add_syncobj(submit, device,
impl->syncobj,
- I915_EXEC_FENCE_WAIT);
+ I915_EXEC_FENCE_WAIT,
+ 0);
if (result != VK_SUCCESS)
goto error;
break;
}
+ case ANV_SEMAPHORE_TYPE_TIMELINE:
+ result = anv_queue_submit_add_timeline_wait(submit, device,
+ &impl->timeline,
+ in_values ? in_values[i] : 0);
+ if (result != VK_SUCCESS)
+ goto error;
+ break;
+
+ case ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ_TIMELINE:
+ result = anv_queue_submit_add_syncobj(submit, device,
+ impl->syncobj,
+ I915_EXEC_FENCE_WAIT,
+ in_values ? in_values[i] : 0);
+ if (result != VK_SUCCESS)
+ goto error;
+ break;
+
default:
break;
}
case ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ: {
result = anv_queue_submit_add_syncobj(submit, device, impl->syncobj,
- I915_EXEC_FENCE_SIGNAL);
+ I915_EXEC_FENCE_SIGNAL,
+ 0);
if (result != VK_SUCCESS)
goto error;
break;
}
+ case ANV_SEMAPHORE_TYPE_TIMELINE:
+ result = anv_queue_submit_add_timeline_signal(submit, device,
+ &impl->timeline,
+ out_values ? out_values[i] : 0);
+ if (result != VK_SUCCESS)
+ goto error;
+ break;
+
+ case ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ_TIMELINE:
+ result = anv_queue_submit_add_syncobj(submit, device, impl->syncobj,
+ I915_EXEC_FENCE_SIGNAL,
+ out_values ? out_values[i] : 0);
+ if (result != VK_SUCCESS)
+ goto error;
+ break;
+
default:
break;
}
}
+ if (wsi_signal_bo) {
+ result = anv_queue_submit_add_fence_bo(submit, wsi_signal_bo, true /* signal */);
+ if (result != VK_SUCCESS)
+ goto error;
+ }
+
if (fence) {
/* Under most circumstances, out fences won't be temporary. However,
* the spec does allow it for opaque_fd. From the Vulkan 1.0.53 spec:
switch (impl->type) {
case ANV_FENCE_TYPE_BO:
+ assert(!device->has_thread_submit);
result = anv_queue_submit_add_fence_bo(submit, impl->bo.bo, true /* signal */);
if (result != VK_SUCCESS)
goto error;
* also reset the fence's syncobj so that they don't contain a
* signaled dma-fence.
*/
+ anv_gem_syncobj_reset(device, impl->syncobj);
+
result = anv_queue_submit_add_syncobj(submit, device, impl->syncobj,
- I915_EXEC_FENCE_SIGNAL);
+ I915_EXEC_FENCE_SIGNAL,
+ 0);
if (result != VK_SUCCESS)
goto error;
break;
}
}
- result = _anv_queue_submit(queue, &submit);
+ result = _anv_queue_submit(queue, &submit, false);
if (result != VK_SUCCESS)
goto error;
if (fence && fence->permanent.type == ANV_FENCE_TYPE_BO) {
- /* BO fences can't be shared, so they can't be temporary. */
+ assert(!device->has_thread_submit);
+ /* If we have permanent BO fence, the only type of temporary possible
+ * would be BO_WSI (because BO fences are not shareable). The Vulkan spec
+ * also requires that the fence passed to vkQueueSubmit() be :
+ *
+ * * unsignaled
+ * * not be associated with any other queue command that has not yet
+ * completed execution on that queue
+ *
+ * So the only acceptable type for the temporary is NONE.
+ */
assert(fence->temporary.type == ANV_FENCE_TYPE_NONE);
/* Once the execbuf has returned, we need to set the fence state to
{
ANV_FROM_HANDLE(anv_queue, queue, _queue);
+ if (queue->device->no_hw)
+ return VK_SUCCESS;
+
/* Query for device status prior to submitting. Technically, we don't need
* to do this. However, if we have a client that's submitting piles of
* garbage, we would rather break as early as possible to keep the GPU
* come up with something more efficient but this shouldn't be a
* common case.
*/
- result = anv_queue_submit(queue, NULL, NULL, 0, NULL, 0, fence);
+ result = anv_queue_submit(queue, NULL, NULL, NULL, 0, NULL, NULL, 0,
+ NULL, fence, -1);
goto out;
}
/* Fence for this submit. NULL for all but the last one */
VkFence submit_fence = (i == submitCount - 1) ? fence : VK_NULL_HANDLE;
+ const struct wsi_memory_signal_submit_info *mem_signal_info =
+ vk_find_struct_const(pSubmits[i].pNext,
+ WSI_MEMORY_SIGNAL_SUBMIT_INFO_MESA);
+ struct anv_bo *wsi_signal_bo =
+ mem_signal_info && mem_signal_info->memory != VK_NULL_HANDLE ?
+ anv_device_memory_from_handle(mem_signal_info->memory)->bo : NULL;
+
+ const VkTimelineSemaphoreSubmitInfoKHR *timeline_info =
+ vk_find_struct_const(pSubmits[i].pNext,
+ TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR);
+ const VkPerformanceQuerySubmitInfoKHR *perf_info =
+ vk_find_struct_const(pSubmits[i].pNext,
+ PERFORMANCE_QUERY_SUBMIT_INFO_KHR);
+ const uint64_t *wait_values =
+ timeline_info && timeline_info->waitSemaphoreValueCount ?
+ timeline_info->pWaitSemaphoreValues : NULL;
+ const uint64_t *signal_values =
+ timeline_info && timeline_info->signalSemaphoreValueCount ?
+ timeline_info->pSignalSemaphoreValues : NULL;
+
if (pSubmits[i].commandBufferCount == 0) {
/* If we don't have any command buffers, we need to submit a dummy
* batch to give GEM something to wait on. We could, potentially,
*/
result = anv_queue_submit(queue, NULL,
pSubmits[i].pWaitSemaphores,
+ wait_values,
pSubmits[i].waitSemaphoreCount,
pSubmits[i].pSignalSemaphores,
+ signal_values,
pSubmits[i].signalSemaphoreCount,
- submit_fence);
+ wsi_signal_bo,
+ submit_fence,
+ -1);
if (result != VK_SUCCESS)
goto out;
submit_fence : VK_NULL_HANDLE;
const VkSemaphore *in_semaphores = NULL, *out_semaphores = NULL;
+ const uint64_t *in_values = NULL, *out_values = NULL;
uint32_t num_in_semaphores = 0, num_out_semaphores = 0;
if (j == 0) {
/* Only the first batch gets the in semaphores */
in_semaphores = pSubmits[i].pWaitSemaphores;
+ in_values = wait_values;
num_in_semaphores = pSubmits[i].waitSemaphoreCount;
}
if (j == pSubmits[i].commandBufferCount - 1) {
/* Only the last batch gets the out semaphores */
out_semaphores = pSubmits[i].pSignalSemaphores;
+ out_values = signal_values;
num_out_semaphores = pSubmits[i].signalSemaphoreCount;
}
result = anv_queue_submit(queue, cmd_buffer,
- in_semaphores, num_in_semaphores,
- out_semaphores, num_out_semaphores,
- execbuf_fence);
+ in_semaphores, in_values, num_in_semaphores,
+ out_semaphores, out_values, num_out_semaphores,
+ wsi_signal_bo, execbuf_fence,
+ perf_info ? perf_info->counterPassIndex : 0);
if (result != VK_SUCCESS)
goto out;
}
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FENCE_CREATE_INFO);
- fence = vk_zalloc2(&device->alloc, pAllocator, sizeof(*fence), 8,
+ fence = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*fence), 8,
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (fence == NULL)
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
- if (device->instance->physicalDevice.has_syncobj_wait) {
+ vk_object_base_init(&device->vk, &fence->base, VK_OBJECT_TYPE_FENCE);
+
+ if (device->physical->has_syncobj_wait) {
fence->permanent.type = ANV_FENCE_TYPE_SYNCOBJ;
uint32_t create_flags = 0;
anv_bo_pool_free(&device->batch_bo_pool, impl->bo.bo);
break;
+ case ANV_FENCE_TYPE_WSI_BO:
+ anv_device_release_bo(device, impl->bo.bo);
+ break;
+
case ANV_FENCE_TYPE_SYNCOBJ:
anv_gem_syncobj_destroy(device, impl->syncobj);
break;
impl->type = ANV_FENCE_TYPE_NONE;
}
+void
+anv_fence_reset_temporary(struct anv_device *device,
+ struct anv_fence *fence)
+{
+ if (fence->temporary.type == ANV_FENCE_TYPE_NONE)
+ return;
+
+ anv_fence_impl_cleanup(device, &fence->temporary);
+}
+
void anv_DestroyFence(
VkDevice _device,
VkFence _fence,
anv_fence_impl_cleanup(device, &fence->temporary);
anv_fence_impl_cleanup(device, &fence->permanent);
- vk_free2(&device->alloc, pAllocator, fence);
+ vk_object_base_finish(&fence->base);
+ vk_free2(&device->vk.alloc, pAllocator, fence);
}
VkResult anv_ResetFences(
* first restored. The remaining operations described therefore
* operate on the restored payload.
*/
- if (fence->temporary.type != ANV_FENCE_TYPE_NONE)
- anv_fence_impl_cleanup(device, &fence->temporary);
+ anv_fence_reset_temporary(device, fence);
struct anv_fence_impl *impl = &fence->permanent;
switch (impl->type) {
case ANV_FENCE_TYPE_BO:
- /* BO fences don't support import/export */
- assert(fence->temporary.type == ANV_FENCE_TYPE_NONE);
+ case ANV_FENCE_TYPE_WSI_BO:
switch (impl->bo.state) {
case ANV_BO_FENCE_STATE_RESET:
/* If it hasn't even been sent off to the GPU yet, it's not ready */
}
case ANV_FENCE_TYPE_SYNCOBJ: {
- int ret = anv_gem_syncobj_wait(device, &impl->syncobj, 1, 0, true);
- if (ret == -1) {
- if (errno == ETIME) {
- return VK_NOT_READY;
+ if (device->has_thread_submit) {
+ uint64_t binary_value = 0;
+ int ret = anv_gem_syncobj_timeline_wait(device, &impl->syncobj,
+ &binary_value, 1, 0,
+ true /* wait_all */,
+ false /* wait_materialize */);
+ if (ret == -1) {
+ if (errno == ETIME) {
+ return VK_NOT_READY;
+ } else {
+ /* We don't know the real error. */
+ return anv_device_set_lost(device, "drm_syncobj_wait failed: %m");
+ }
} else {
- /* We don't know the real error. */
- return anv_device_set_lost(device, "drm_syncobj_wait failed: %m");
+ return VK_SUCCESS;
}
} else {
- return VK_SUCCESS;
+ int ret = anv_gem_syncobj_wait(device, &impl->syncobj, 1, 0, false);
+ if (ret == -1) {
+ if (errno == ETIME) {
+ return VK_NOT_READY;
+ } else {
+ /* We don't know the real error. */
+ return anv_device_set_lost(device, "drm_syncobj_wait failed: %m");
+ }
+ } else {
+ return VK_SUCCESS;
+ }
}
}
bool waitAll,
uint64_t abs_timeout_ns)
{
- uint32_t *syncobjs = vk_zalloc(&device->alloc,
+ uint32_t *syncobjs = vk_zalloc(&device->vk.alloc,
sizeof(*syncobjs) * fenceCount, 8,
VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
if (!syncobjs)
syncobjs[i] = impl->syncobj;
}
+ int ret = 0;
/* The gem_syncobj_wait ioctl may return early due to an inherent
- * limitation in the way it computes timeouts. Loop until we've actually
+ * limitation in the way it computes timeouts. Loop until we've actually
* passed the timeout.
*/
- int ret;
do {
ret = anv_gem_syncobj_wait(device, syncobjs, fenceCount,
abs_timeout_ns, waitAll);
} while (ret == -1 && errno == ETIME && anv_gettime_ns() < abs_timeout_ns);
- vk_free(&device->alloc, syncobjs);
+ vk_free(&device->vk.alloc, syncobjs);
if (ret == -1) {
if (errno == ETIME) {
for (uint32_t i = 0; i < fenceCount; i++) {
ANV_FROM_HANDLE(anv_fence, fence, pFences[i]);
- /* This function assumes that all fences are BO fences and that they
- * have no temporary state. Since BO fences will never be exported,
- * this should be a safe assumption.
- */
- assert(fence->permanent.type == ANV_FENCE_TYPE_BO);
- assert(fence->temporary.type == ANV_FENCE_TYPE_NONE);
- struct anv_fence_impl *impl = &fence->permanent;
+ struct anv_fence_impl *impl =
+ fence->temporary.type != ANV_FENCE_TYPE_NONE ?
+ &fence->temporary : &fence->permanent;
+ assert(impl->type == ANV_FENCE_TYPE_BO ||
+ impl->type == ANV_FENCE_TYPE_WSI_BO);
switch (impl->bo.state) {
case ANV_BO_FENCE_STATE_RESET:
static VkResult
anv_wait_for_wsi_fence(struct anv_device *device,
- const VkFence _fence,
+ struct anv_fence_impl *impl,
uint64_t abs_timeout)
{
- ANV_FROM_HANDLE(anv_fence, fence, _fence);
- struct anv_fence_impl *impl = &fence->permanent;
-
return impl->fence_wsi->wait(impl->fence_wsi, abs_timeout);
}
if (fenceCount <= 1 || waitAll) {
for (uint32_t i = 0; i < fenceCount; i++) {
ANV_FROM_HANDLE(anv_fence, fence, pFences[i]);
- switch (fence->permanent.type) {
+ struct anv_fence_impl *impl =
+ fence->temporary.type != ANV_FENCE_TYPE_NONE ?
+ &fence->temporary : &fence->permanent;
+
+ switch (impl->type) {
case ANV_FENCE_TYPE_BO:
+ assert(!device->physical->has_syncobj_wait);
+ /* fall-through */
+ case ANV_FENCE_TYPE_WSI_BO:
result = anv_wait_for_bo_fences(device, 1, &pFences[i],
true, abs_timeout);
break;
true, abs_timeout);
break;
case ANV_FENCE_TYPE_WSI:
- result = anv_wait_for_wsi_fence(device, pFences[i], abs_timeout);
+ result = anv_wait_for_wsi_fence(device, impl, abs_timeout);
break;
case ANV_FENCE_TYPE_NONE:
result = VK_SUCCESS;
{
for (uint32_t i = 0; i < fenceCount; ++i) {
ANV_FROM_HANDLE(anv_fence, fence, pFences[i]);
- if (fence->permanent.type != ANV_FENCE_TYPE_SYNCOBJ)
+ struct anv_fence_impl *impl =
+ fence->temporary.type != ANV_FENCE_TYPE_NONE ?
+ &fence->temporary : &fence->permanent;
+ if (impl->type != ANV_FENCE_TYPE_SYNCOBJ)
return false;
}
return true;
{
for (uint32_t i = 0; i < fenceCount; ++i) {
ANV_FROM_HANDLE(anv_fence, fence, pFences[i]);
- if (fence->permanent.type != ANV_FENCE_TYPE_BO)
+ struct anv_fence_impl *impl =
+ fence->temporary.type != ANV_FENCE_TYPE_NONE ?
+ &fence->temporary : &fence->permanent;
+ if (impl->type != ANV_FENCE_TYPE_BO &&
+ impl->type != ANV_FENCE_TYPE_WSI_BO)
return false;
}
return true;
{
ANV_FROM_HANDLE(anv_device, device, _device);
+ if (device->no_hw)
+ return VK_SUCCESS;
+
if (anv_device_is_lost(device))
return VK_ERROR_DEVICE_LOST;
break;
- case VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT:
+ case VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT: {
/* Sync files are a bit tricky. Because we want to continue using the
* syncobj implementation of WaitForFences, we don't use the sync file
* directly but instead import it into a syncobj.
*/
new_impl.type = ANV_FENCE_TYPE_SYNCOBJ;
- new_impl.syncobj = anv_gem_syncobj_create(device, 0);
+ /* "If handleType is VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT, the
+ * special value -1 for fd is treated like a valid sync file descriptor
+ * referring to an object that has already signaled. The import
+ * operation will succeed and the VkFence will have a temporarily
+ * imported payload as if a valid file descriptor had been provided."
+ */
+ uint32_t create_flags = 0;
+ if (fd == -1)
+ create_flags |= DRM_SYNCOBJ_CREATE_SIGNALED;
+
+ new_impl.syncobj = anv_gem_syncobj_create(device, create_flags);
if (!new_impl.syncobj)
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
- if (anv_gem_syncobj_import_sync_file(device, new_impl.syncobj, fd)) {
+ if (fd != -1 &&
+ anv_gem_syncobj_import_sync_file(device, new_impl.syncobj, fd)) {
anv_gem_syncobj_destroy(device, new_impl.syncobj);
- return vk_errorf(device->instance, NULL,
- VK_ERROR_INVALID_EXTERNAL_HANDLE,
+ return vk_errorf(device, NULL, VK_ERROR_INVALID_EXTERNAL_HANDLE,
"syncobj sync file import failed: %m");
}
break;
+ }
default:
return vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE);
return VK_SUCCESS;
}
+/* The sideband payload of the DRM syncobj was incremented when the
+ * application called vkQueueSubmit(). Here we wait for a fence with the same
+ * value to materialize so that we can exporting (typically as a SyncFD).
+ */
+static VkResult
+wait_syncobj_materialize(struct anv_device *device,
+ uint32_t syncobj,
+ int *fd)
+{
+ if (!device->has_thread_submit)
+ return VK_SUCCESS;
+
+ uint64_t binary_value = 0;
+ /* We might need to wait until the fence materializes before we can
+ * export to a sync FD when we use a thread for submission.
+ */
+ if (anv_gem_syncobj_timeline_wait(device, &syncobj, &binary_value, 1,
+ anv_get_absolute_timeout(5ull * NSEC_PER_SEC),
+ true /* wait_all */,
+ true /* wait_materialize */))
+ return anv_device_set_lost(device, "anv_gem_syncobj_timeline_wait failed: %m");
+
+ return VK_SUCCESS;
+}
+
VkResult anv_GetFenceFdKHR(
VkDevice _device,
const VkFenceGetFdInfoKHR* pGetFdInfo,
}
case VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT: {
+ VkResult result = wait_syncobj_materialize(device, impl->syncobj, pFd);
+ if (result != VK_SUCCESS)
+ return result;
+
int fd = anv_gem_syncobj_export_sync_file(device, impl->syncobj);
if (fd < 0)
return vk_error(VK_ERROR_TOO_MANY_OBJECTS);
// Queue semaphore functions
+static VkSemaphoreTypeKHR
+get_semaphore_type(const void *pNext, uint64_t *initial_value)
+{
+ const VkSemaphoreTypeCreateInfoKHR *type_info =
+ vk_find_struct_const(pNext, SEMAPHORE_TYPE_CREATE_INFO_KHR);
+
+ if (!type_info)
+ return VK_SEMAPHORE_TYPE_BINARY_KHR;
+
+ if (initial_value)
+ *initial_value = type_info->initialValue;
+ return type_info->semaphoreType;
+}
+
+static VkResult
+binary_semaphore_create(struct anv_device *device,
+ struct anv_semaphore_impl *impl,
+ bool exportable)
+{
+ if (device->physical->has_syncobj) {
+ impl->type = ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ;
+ impl->syncobj = anv_gem_syncobj_create(device, 0);
+ if (!impl->syncobj)
+ return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+ return VK_SUCCESS;
+ } else {
+ impl->type = ANV_SEMAPHORE_TYPE_BO;
+ VkResult result =
+ anv_device_alloc_bo(device, 4096,
+ ANV_BO_ALLOC_EXTERNAL |
+ ANV_BO_ALLOC_IMPLICIT_SYNC,
+ 0 /* explicit_address */,
+ &impl->bo);
+ /* If we're going to use this as a fence, we need to *not* have the
+ * EXEC_OBJECT_ASYNC bit set.
+ */
+ assert(!(impl->bo->flags & EXEC_OBJECT_ASYNC));
+ return result;
+ }
+}
+
+static VkResult
+timeline_semaphore_create(struct anv_device *device,
+ struct anv_semaphore_impl *impl,
+ uint64_t initial_value)
+{
+ if (device->has_thread_submit) {
+ impl->type = ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ_TIMELINE;
+ impl->syncobj = anv_gem_syncobj_create(device, 0);
+ if (!impl->syncobj)
+ return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+ if (initial_value) {
+ if (anv_gem_syncobj_timeline_signal(device,
+ &impl->syncobj,
+ &initial_value, 1)) {
+ anv_gem_syncobj_destroy(device, impl->syncobj);
+ return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+ }
+ }
+ } else {
+ impl->type = ANV_SEMAPHORE_TYPE_TIMELINE;
+ anv_timeline_init(device, &impl->timeline, initial_value);
+ }
+
+ return VK_SUCCESS;
+}
+
VkResult anv_CreateSemaphore(
VkDevice _device,
const VkSemaphoreCreateInfo* pCreateInfo,
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO);
- semaphore = vk_alloc(&device->alloc, sizeof(*semaphore), 8,
+ uint64_t timeline_value = 0;
+ VkSemaphoreTypeKHR sem_type = get_semaphore_type(pCreateInfo->pNext, &timeline_value);
+
+ semaphore = vk_alloc(&device->vk.alloc, sizeof(*semaphore), 8,
VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
if (semaphore == NULL)
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+ vk_object_base_init(&device->vk, &semaphore->base, VK_OBJECT_TYPE_SEMAPHORE);
+
p_atomic_set(&semaphore->refcount, 1);
const VkExportSemaphoreCreateInfo *export =
vk_find_struct_const(pCreateInfo->pNext, EXPORT_SEMAPHORE_CREATE_INFO);
- VkExternalSemaphoreHandleTypeFlags handleTypes =
+ VkExternalSemaphoreHandleTypeFlags handleTypes =
export ? export->handleTypes : 0;
+ VkResult result;
if (handleTypes == 0) {
- /* The DRM execbuffer ioctl always execute in-oder so long as you stay
- * on the same ring. Since we don't expose the blit engine as a DMA
- * queue, a dummy no-op semaphore is a perfectly valid implementation.
- */
- semaphore->permanent.type = ANV_SEMAPHORE_TYPE_DUMMY;
+ if (sem_type == VK_SEMAPHORE_TYPE_BINARY_KHR)
+ result = binary_semaphore_create(device, &semaphore->permanent, false);
+ else
+ result = timeline_semaphore_create(device, &semaphore->permanent, timeline_value);
+ if (result != VK_SUCCESS) {
+ vk_free2(&device->vk.alloc, pAllocator, semaphore);
+ return result;
+ }
} else if (handleTypes & VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT) {
assert(handleTypes == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT);
- if (device->instance->physicalDevice.has_syncobj) {
- semaphore->permanent.type = ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ;
- semaphore->permanent.syncobj = anv_gem_syncobj_create(device, 0);
- if (!semaphore->permanent.syncobj) {
- vk_free2(&device->alloc, pAllocator, semaphore);
- return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
- }
- } else {
- semaphore->permanent.type = ANV_SEMAPHORE_TYPE_BO;
- VkResult result = anv_device_alloc_bo(device, 4096,
- ANV_BO_ALLOC_EXTERNAL |
- ANV_BO_ALLOC_IMPLICIT_SYNC,
- &semaphore->permanent.bo);
- if (result != VK_SUCCESS) {
- vk_free2(&device->alloc, pAllocator, semaphore);
- return result;
- }
-
- /* If we're going to use this as a fence, we need to *not* have the
- * EXEC_OBJECT_ASYNC bit set.
- */
- assert(!(semaphore->permanent.bo->flags & EXEC_OBJECT_ASYNC));
+ if (sem_type == VK_SEMAPHORE_TYPE_BINARY_KHR)
+ result = binary_semaphore_create(device, &semaphore->permanent, true);
+ else
+ result = timeline_semaphore_create(device, &semaphore->permanent, timeline_value);
+ if (result != VK_SUCCESS) {
+ vk_free2(&device->vk.alloc, pAllocator, semaphore);
+ return result;
}
} else if (handleTypes & VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT) {
assert(handleTypes == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT);
- if (device->instance->physicalDevice.has_syncobj) {
+ assert(sem_type == VK_SEMAPHORE_TYPE_BINARY_KHR);
+ if (device->physical->has_syncobj) {
semaphore->permanent.type = ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ;
semaphore->permanent.syncobj = anv_gem_syncobj_create(device, 0);
+ if (!semaphore->permanent.syncobj) {
+ vk_free2(&device->vk.alloc, pAllocator, semaphore);
+ return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+ }
} else {
semaphore->permanent.type = ANV_SEMAPHORE_TYPE_SYNC_FILE;
semaphore->permanent.fd = -1;
}
} else {
assert(!"Unknown handle type");
- vk_free2(&device->alloc, pAllocator, semaphore);
+ vk_free2(&device->vk.alloc, pAllocator, semaphore);
return vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE);
}
break;
case ANV_SEMAPHORE_TYPE_BO:
+ case ANV_SEMAPHORE_TYPE_WSI_BO:
anv_device_release_bo(device, impl->bo);
break;
case ANV_SEMAPHORE_TYPE_SYNC_FILE:
- close(impl->fd);
+ if (impl->fd >= 0)
+ close(impl->fd);
+ break;
+
+ case ANV_SEMAPHORE_TYPE_TIMELINE:
+ anv_timeline_finish(device, &impl->timeline);
break;
case ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ:
+ case ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ_TIMELINE:
anv_gem_syncobj_destroy(device, impl->syncobj);
break;
anv_semaphore_impl_cleanup(device, &semaphore->temporary);
anv_semaphore_impl_cleanup(device, &semaphore->permanent);
- vk_free(&device->alloc, semaphore);
+
+ vk_object_base_finish(&semaphore->base);
+ vk_free(&device->vk.alloc, semaphore);
}
void anv_DestroySemaphore(
{
ANV_FROM_HANDLE(anv_physical_device, device, physicalDevice);
+ VkSemaphoreTypeKHR sem_type =
+ get_semaphore_type(pExternalSemaphoreInfo->pNext, NULL);
+
switch (pExternalSemaphoreInfo->handleType) {
case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT:
+ /* Timeline semaphores are not exportable, unless we have threaded
+ * submission.
+ */
+ if (sem_type == VK_SEMAPHORE_TYPE_TIMELINE_KHR && !device->has_thread_submit)
+ break;
pExternalSemaphoreProperties->exportFromImportedHandleTypes =
VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT;
pExternalSemaphoreProperties->compatibleHandleTypes =
return;
case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT:
- if (device->has_exec_fence) {
- pExternalSemaphoreProperties->exportFromImportedHandleTypes =
- VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT;
- pExternalSemaphoreProperties->compatibleHandleTypes =
- VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT;
- pExternalSemaphoreProperties->externalSemaphoreFeatures =
- VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT |
- VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT;
- return;
- }
- break;
+ if (sem_type == VK_SEMAPHORE_TYPE_TIMELINE_KHR)
+ break;
+ if (!device->has_exec_fence)
+ break;
+ pExternalSemaphoreProperties->exportFromImportedHandleTypes =
+ VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT;
+ pExternalSemaphoreProperties->compatibleHandleTypes =
+ VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT;
+ pExternalSemaphoreProperties->externalSemaphoreFeatures =
+ VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT |
+ VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT;
+ return;
default:
break;
switch (pImportSemaphoreFdInfo->handleType) {
case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT:
- if (device->instance->physicalDevice.has_syncobj) {
- new_impl.type = ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ;
+ if (device->physical->has_syncobj) {
+ /* When importing non temporarily, reuse the semaphore's existing
+ * type. The Linux/DRM implementation allows to interchangeably use
+ * binary & timeline semaphores and we have no way to differenciate
+ * them.
+ */
+ if (pImportSemaphoreFdInfo->flags & VK_SEMAPHORE_IMPORT_TEMPORARY_BIT)
+ new_impl.type = ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ;
+ else
+ new_impl.type = semaphore->permanent.type;
new_impl.syncobj = anv_gem_syncobj_fd_to_handle(device, fd);
if (!new_impl.syncobj)
VkResult result = anv_device_import_bo(device, fd,
ANV_BO_ALLOC_EXTERNAL |
ANV_BO_ALLOC_IMPLICIT_SYNC,
+ 0 /* client_address */,
&new_impl.bo);
if (result != VK_SUCCESS)
return result;
break;
case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT:
- if (device->instance->physicalDevice.has_syncobj) {
+ if (device->physical->has_syncobj) {
+ uint32_t create_flags = 0;
+
+ if (fd == -1)
+ create_flags |= DRM_SYNCOBJ_CREATE_SIGNALED;
+
new_impl = (struct anv_semaphore_impl) {
.type = ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ,
- .syncobj = anv_gem_syncobj_create(device, 0),
+ .syncobj = anv_gem_syncobj_create(device, create_flags),
};
+
if (!new_impl.syncobj)
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
- if (anv_gem_syncobj_import_sync_file(device, new_impl.syncobj, fd)) {
- anv_gem_syncobj_destroy(device, new_impl.syncobj);
- return vk_errorf(device->instance, NULL,
- VK_ERROR_INVALID_EXTERNAL_HANDLE,
- "syncobj sync file import failed: %m");
+
+ if (fd != -1) {
+ if (anv_gem_syncobj_import_sync_file(device, new_impl.syncobj, fd)) {
+ anv_gem_syncobj_destroy(device, new_impl.syncobj);
+ return vk_errorf(device, NULL, VK_ERROR_INVALID_EXTERNAL_HANDLE,
+ "syncobj sync file import failed: %m");
+ }
+ /* Ownership of the FD is transfered to Anv. Since we don't need it
+ * anymore because the associated fence has been put into a syncobj,
+ * we must close the FD.
+ */
+ close(fd);
}
- /* Ownership of the FD is transfered to Anv. Since we don't need it
- * anymore because the associated fence has been put into a syncobj,
- * we must close the FD.
- */
- close(fd);
} else {
new_impl = (struct anv_semaphore_impl) {
.type = ANV_SEMAPHORE_TYPE_SYNC_FILE,
}
case ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ:
- if (pGetFdInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT)
+ if (pGetFdInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT) {
+ VkResult result = wait_syncobj_materialize(device, impl->syncobj, pFd);
+ if (result != VK_SUCCESS)
+ return result;
+
fd = anv_gem_syncobj_export_sync_file(device, impl->syncobj);
- else {
+ } else {
assert(pGetFdInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT);
fd = anv_gem_syncobj_handle_to_fd(device, impl->syncobj);
}
*pFd = fd;
break;
+ case ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ_TIMELINE:
+ assert(pGetFdInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT);
+ fd = anv_gem_syncobj_handle_to_fd(device, impl->syncobj);
+ if (fd < 0)
+ return vk_error(VK_ERROR_TOO_MANY_OBJECTS);
+ *pFd = fd;
+ break;
+
default:
return vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE);
}
return VK_SUCCESS;
}
+
+VkResult anv_GetSemaphoreCounterValue(
+ VkDevice _device,
+ VkSemaphore _semaphore,
+ uint64_t* pValue)
+{
+ ANV_FROM_HANDLE(anv_device, device, _device);
+ ANV_FROM_HANDLE(anv_semaphore, semaphore, _semaphore);
+
+ struct anv_semaphore_impl *impl =
+ semaphore->temporary.type != ANV_SEMAPHORE_TYPE_NONE ?
+ &semaphore->temporary : &semaphore->permanent;
+
+ switch (impl->type) {
+ case ANV_SEMAPHORE_TYPE_TIMELINE: {
+ pthread_mutex_lock(&device->mutex);
+ anv_timeline_gc_locked(device, &impl->timeline);
+ *pValue = impl->timeline.highest_past;
+ pthread_mutex_unlock(&device->mutex);
+ return VK_SUCCESS;
+ }
+
+ case ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ_TIMELINE: {
+ int ret = anv_gem_syncobj_timeline_query(device, &impl->syncobj, pValue, 1);
+
+ if (ret != 0)
+ return anv_device_set_lost(device, "unable to query timeline syncobj");
+
+ return VK_SUCCESS;
+ }
+
+ default:
+ unreachable("Invalid semaphore type");
+ }
+}
+
+static VkResult
+anv_timeline_wait_locked(struct anv_device *device,
+ struct anv_timeline *timeline,
+ uint64_t serial, uint64_t abs_timeout_ns)
+{
+ /* Wait on the queue_submit condition variable until the timeline has a
+ * time point pending that's at least as high as serial.
+ */
+ while (timeline->highest_pending < serial) {
+ struct timespec abstime = {
+ .tv_sec = abs_timeout_ns / NSEC_PER_SEC,
+ .tv_nsec = abs_timeout_ns % NSEC_PER_SEC,
+ };
+
+ UNUSED int ret = pthread_cond_timedwait(&device->queue_submit,
+ &device->mutex, &abstime);
+ assert(ret != EINVAL);
+ if (anv_gettime_ns() >= abs_timeout_ns &&
+ timeline->highest_pending < serial)
+ return VK_TIMEOUT;
+ }
+
+ while (1) {
+ VkResult result = anv_timeline_gc_locked(device, timeline);
+ if (result != VK_SUCCESS)
+ return result;
+
+ if (timeline->highest_past >= serial)
+ return VK_SUCCESS;
+
+ /* If we got here, our earliest time point has a busy BO */
+ struct anv_timeline_point *point =
+ list_first_entry(&timeline->points,
+ struct anv_timeline_point, link);
+
+ /* Drop the lock while we wait. */
+ point->waiting++;
+ pthread_mutex_unlock(&device->mutex);
+
+ result = anv_device_wait(device, point->bo,
+ anv_get_relative_timeout(abs_timeout_ns));
+
+ /* Pick the mutex back up */
+ pthread_mutex_lock(&device->mutex);
+ point->waiting--;
+
+ /* This covers both VK_TIMEOUT and VK_ERROR_DEVICE_LOST */
+ if (result != VK_SUCCESS)
+ return result;
+ }
+}
+
+static VkResult
+anv_timelines_wait(struct anv_device *device,
+ struct anv_timeline **timelines,
+ const uint64_t *serials,
+ uint32_t n_timelines,
+ bool wait_all,
+ uint64_t abs_timeout_ns)
+{
+ if (!wait_all && n_timelines > 1) {
+ pthread_mutex_lock(&device->mutex);
+
+ while (1) {
+ VkResult result;
+ for (uint32_t i = 0; i < n_timelines; i++) {
+ result =
+ anv_timeline_wait_locked(device, timelines[i], serials[i], 0);
+ if (result != VK_TIMEOUT)
+ break;
+ }
+
+ if (result != VK_TIMEOUT ||
+ anv_gettime_ns() >= abs_timeout_ns) {
+ pthread_mutex_unlock(&device->mutex);
+ return result;
+ }
+
+ /* If none of them are ready do a short wait so we don't completely
+ * spin while holding the lock. The 10us is completely arbitrary.
+ */
+ uint64_t abs_short_wait_ns =
+ anv_get_absolute_timeout(
+ MIN2((anv_gettime_ns() - abs_timeout_ns) / 10, 10 * 1000));
+ struct timespec abstime = {
+ .tv_sec = abs_short_wait_ns / NSEC_PER_SEC,
+ .tv_nsec = abs_short_wait_ns % NSEC_PER_SEC,
+ };
+ ASSERTED int ret;
+ ret = pthread_cond_timedwait(&device->queue_submit,
+ &device->mutex, &abstime);
+ assert(ret != EINVAL);
+ }
+ } else {
+ VkResult result = VK_SUCCESS;
+ pthread_mutex_lock(&device->mutex);
+ for (uint32_t i = 0; i < n_timelines; i++) {
+ result =
+ anv_timeline_wait_locked(device, timelines[i],
+ serials[i], abs_timeout_ns);
+ if (result != VK_SUCCESS)
+ break;
+ }
+ pthread_mutex_unlock(&device->mutex);
+ return result;
+ }
+}
+
+VkResult anv_WaitSemaphores(
+ VkDevice _device,
+ const VkSemaphoreWaitInfoKHR* pWaitInfo,
+ uint64_t timeout)
+{
+ ANV_FROM_HANDLE(anv_device, device, _device);
+ uint32_t *handles;
+ struct anv_timeline **timelines;
+ uint64_t *values;
+
+ ANV_MULTIALLOC(ma);
+
+ anv_multialloc_add(&ma, &values, pWaitInfo->semaphoreCount);
+ if (device->has_thread_submit) {
+ anv_multialloc_add(&ma, &handles, pWaitInfo->semaphoreCount);
+ } else {
+ anv_multialloc_add(&ma, &timelines, pWaitInfo->semaphoreCount);
+ }
+
+ if (!anv_multialloc_alloc(&ma, &device->vk.alloc,
+ VK_SYSTEM_ALLOCATION_SCOPE_COMMAND))
+ return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ uint32_t handle_count = 0;
+ for (uint32_t i = 0; i < pWaitInfo->semaphoreCount; i++) {
+ ANV_FROM_HANDLE(anv_semaphore, semaphore, pWaitInfo->pSemaphores[i]);
+ struct anv_semaphore_impl *impl =
+ semaphore->temporary.type != ANV_SEMAPHORE_TYPE_NONE ?
+ &semaphore->temporary : &semaphore->permanent;
+
+ if (pWaitInfo->pValues[i] == 0)
+ continue;
+
+ if (device->has_thread_submit) {
+ assert(impl->type == ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ_TIMELINE);
+ handles[handle_count] = impl->syncobj;
+ } else {
+ assert(impl->type == ANV_SEMAPHORE_TYPE_TIMELINE);
+ timelines[handle_count] = &impl->timeline;
+ }
+ values[handle_count] = pWaitInfo->pValues[i];
+ handle_count++;
+ }
+
+ VkResult result = VK_SUCCESS;
+ if (handle_count > 0) {
+ if (device->has_thread_submit) {
+ int ret =
+ anv_gem_syncobj_timeline_wait(device,
+ handles, values, handle_count,
+ anv_get_absolute_timeout(timeout),
+ !(pWaitInfo->flags & VK_SEMAPHORE_WAIT_ANY_BIT_KHR),
+ false);
+ if (ret != 0)
+ result = errno == ETIME ? VK_TIMEOUT :
+ anv_device_set_lost(device, "unable to wait on timeline syncobj");
+ } else {
+ result =
+ anv_timelines_wait(device, timelines, values, handle_count,
+ !(pWaitInfo->flags & VK_SEMAPHORE_WAIT_ANY_BIT_KHR),
+ anv_get_absolute_timeout(timeout));
+ }
+ }
+
+ vk_free(&device->vk.alloc, values);
+
+ return result;
+}
+
+VkResult anv_SignalSemaphore(
+ VkDevice _device,
+ const VkSemaphoreSignalInfoKHR* pSignalInfo)
+{
+ ANV_FROM_HANDLE(anv_device, device, _device);
+ ANV_FROM_HANDLE(anv_semaphore, semaphore, pSignalInfo->semaphore);
+
+ struct anv_semaphore_impl *impl =
+ semaphore->temporary.type != ANV_SEMAPHORE_TYPE_NONE ?
+ &semaphore->temporary : &semaphore->permanent;
+
+ switch (impl->type) {
+ case ANV_SEMAPHORE_TYPE_TIMELINE: {
+ pthread_mutex_lock(&device->mutex);
+
+ VkResult result = anv_timeline_gc_locked(device, &impl->timeline);
+
+ assert(pSignalInfo->value > impl->timeline.highest_pending);
+
+ impl->timeline.highest_pending = impl->timeline.highest_past = pSignalInfo->value;
+
+ if (result == VK_SUCCESS)
+ result = anv_device_submit_deferred_locked(device);
+
+ pthread_cond_broadcast(&device->queue_submit);
+ pthread_mutex_unlock(&device->mutex);
+ return result;
+ }
+
+ case ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ_TIMELINE: {
+ /* Timeline semaphores are created with a value of 0, so signaling on 0
+ * is a waste of time.
+ */
+ if (pSignalInfo->value == 0)
+ return VK_SUCCESS;
+
+ int ret = anv_gem_syncobj_timeline_signal(device, &impl->syncobj,
+ &pSignalInfo->value, 1);
+
+ return ret == 0 ? VK_SUCCESS :
+ anv_device_set_lost(device, "unable to signal timeline syncobj");
+ }
+
+ default:
+ unreachable("Invalid semaphore type");
+ }
+}