radv: Add timelines with a VK_KHR_timeline_semaphore impl.
authorBas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Tue, 22 Oct 2019 08:18:06 +0000 (10:18 +0200)
committerBas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Wed, 30 Oct 2019 10:57:07 +0000 (11:57 +0100)
This does not fully do wait-before-submit, to be done in a follow
up patch.

For kernels without support for timeline syncobjs, this adds an
implementation of non-shareable timelines using legacy syncobjs.

Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
src/amd/vulkan/radv_device.c
src/amd/vulkan/radv_extensions.py
src/amd/vulkan/radv_private.h

index 5d46c609bdea2bb846a8323b57578ac016567cee..5b27961e50d8498a1c5439714699d4c0b05abc4a 100644 (file)
 #include "util/build_id.h"
 #include "util/debug.h"
 #include "util/mesa-sha1.h"
+#include "util/timespec.h"
 #include "compiler/glsl_types.h"
 #include "util/xmlpool.h"
 
+static struct radv_timeline_point *
+radv_timeline_find_point_at_least_locked(struct radv_device *device,
+                                         struct radv_timeline *timeline,
+                                         uint64_t p);
+
+static struct radv_timeline_point *
+radv_timeline_add_point_locked(struct radv_device *device,
+                               struct radv_timeline *timeline,
+                               uint64_t p);
+
+
 static
 void radv_destroy_semaphore_part(struct radv_device *device,
                                  struct radv_semaphore_part *part);
@@ -2276,7 +2288,26 @@ static VkResult fork_secure_compile_device(struct radv_device *device)
                        }
                }
        }
+       return VK_SUCCESS;
+}
+
+static VkResult
+radv_create_pthread_cond(pthread_cond_t *cond)
+{
+       pthread_condattr_t condattr;
+       if (pthread_condattr_init(&condattr)) {
+               return VK_ERROR_INITIALIZATION_FAILED;
+       }
 
+       if (pthread_condattr_setclock(&condattr, CLOCK_MONOTONIC)) {
+               pthread_condattr_destroy(&condattr);
+               return VK_ERROR_INITIALIZATION_FAILED;
+       }
+       if (pthread_cond_init(cond, &condattr)) {
+               pthread_condattr_destroy(&condattr);
+               return VK_ERROR_INITIALIZATION_FAILED;
+       }
+       pthread_condattr_destroy(&condattr);
        return VK_SUCCESS;
 }
 
@@ -2479,6 +2510,10 @@ VkResult radv_CreateDevice(
 
        device->mem_cache = radv_pipeline_cache_from_handle(pc);
 
+       result = radv_create_pthread_cond(&device->timeline_cond);
+       if (result != VK_SUCCESS)
+               goto fail_mem_cache;
+
        device->force_aniso =
                MIN2(16, radv_get_int_debug_option("RADV_TEX_ANISO", -1));
        if (device->force_aniso >= 0) {
@@ -2497,6 +2532,8 @@ VkResult radv_CreateDevice(
        *pDevice = radv_device_to_handle(device);
        return VK_SUCCESS;
 
+fail_mem_cache:
+       radv_DestroyPipelineCache(radv_device_to_handle(device), pc, NULL);
 fail_meta:
        radv_device_finish_meta(device);
 fail:
@@ -2549,6 +2586,7 @@ void radv_DestroyDevice(
 
        radv_destroy_shader_slabs(device);
 
+       pthread_cond_destroy(&device->timeline_cond);
        radv_bo_list_finish(&device->bo_list);
 
        if (radv_device_use_secure_compile(device->instance)) {
@@ -3404,11 +3442,13 @@ fail:
        return vk_error(queue->device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
 }
 
-static VkResult radv_alloc_sem_counts(struct radv_instance *instance,
+static VkResult radv_alloc_sem_counts(struct radv_device *device,
                                      struct radv_winsys_sem_counts *counts,
                                      int num_sems,
                                      struct radv_semaphore_part **sems,
-                                     VkFence _fence)
+                                     const uint64_t *timeline_values,
+                                     VkFence _fence,
+                                     bool is_signal)
 {
        int syncobj_idx = 0, sem_idx = 0;
 
@@ -3416,10 +3456,19 @@ static VkResult radv_alloc_sem_counts(struct radv_instance *instance,
                return VK_SUCCESS;
 
        for (uint32_t i = 0; i < num_sems; i++) {
-               if(sems[i]->kind == RADV_SEMAPHORE_SYNCOBJ)
+               switch(sems[i]->kind) {
+               case RADV_SEMAPHORE_SYNCOBJ:
                        counts->syncobj_count++;
-               else
+                       break;
+               case RADV_SEMAPHORE_WINSYS:
                        counts->sem_count++;
+                       break;
+               case RADV_SEMAPHORE_NONE:
+                       break;
+               case RADV_SEMAPHORE_TIMELINE:
+                       counts->syncobj_count++;
+                       break;
+               }
        }
 
        if (_fence != VK_NULL_HANDLE) {
@@ -3431,14 +3480,14 @@ static VkResult radv_alloc_sem_counts(struct radv_instance *instance,
        if (counts->syncobj_count) {
                counts->syncobj = (uint32_t *)malloc(sizeof(uint32_t) * counts->syncobj_count);
                if (!counts->syncobj)
-                       return vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+                       return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
        }
 
        if (counts->sem_count) {
                counts->sem = (struct radeon_winsys_sem **)malloc(sizeof(struct radeon_winsys_sem *) * counts->sem_count);
                if (!counts->sem) {
                        free(counts->syncobj);
-                       return vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+                       return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
                }
        }
 
@@ -3453,6 +3502,26 @@ static VkResult radv_alloc_sem_counts(struct radv_instance *instance,
                case RADV_SEMAPHORE_WINSYS:
                        counts->sem[sem_idx++] = sems[i]->ws_sem;
                        break;
+               case RADV_SEMAPHORE_TIMELINE: {
+                       pthread_mutex_lock(&sems[i]->timeline.mutex);
+                       struct radv_timeline_point *point = NULL;
+                       if (is_signal) {
+                               point = radv_timeline_add_point_locked(device, &sems[i]->timeline, timeline_values[i]);
+                       } else {
+                               point = radv_timeline_find_point_at_least_locked(device, &sems[i]->timeline, timeline_values[i]);
+                       }
+
+                       pthread_mutex_unlock(&sems[i]->timeline.mutex);
+
+                       if (point) {
+                               counts->syncobj[syncobj_idx++] = point->syncobj;
+                       } else {
+                               /* Explicitly remove the semaphore so we might not find
+                                * a point later post-submit. */
+                               sems[i] = NULL;
+                       }
+                       break;
+               }
                }
        }
 
@@ -3464,6 +3533,9 @@ static VkResult radv_alloc_sem_counts(struct radv_instance *instance,
                        counts->syncobj[syncobj_idx++] = fence->syncobj;
        }
 
+       assert(syncobj_idx <= counts->syncobj_count);
+       counts->syncobj_count = syncobj_idx;
+
        return VK_SUCCESS;
 }
 
@@ -3487,21 +3559,23 @@ static void radv_free_temp_syncobjs(struct radv_device *device,
 }
 
 static VkResult
-radv_alloc_sem_info(struct radv_instance *instance,
+radv_alloc_sem_info(struct radv_device *device,
                    struct radv_winsys_sem_info *sem_info,
                    int num_wait_sems,
                    struct radv_semaphore_part **wait_sems,
+                   const uint64_t *wait_values,
                    int num_signal_sems,
                    struct radv_semaphore_part **signal_sems,
+                   const uint64_t *signal_values,
                    VkFence fence)
 {
        VkResult ret;
        memset(sem_info, 0, sizeof(*sem_info));
 
-       ret = radv_alloc_sem_counts(instance, &sem_info->wait, num_wait_sems, wait_sems, VK_NULL_HANDLE);
+       ret = radv_alloc_sem_counts(device, &sem_info->wait, num_wait_sems, wait_sems, wait_values, VK_NULL_HANDLE, false);
        if (ret)
                return ret;
-       ret = radv_alloc_sem_counts(instance, &sem_info->signal, num_signal_sems, signal_sems, fence);
+       ret = radv_alloc_sem_counts(device, &sem_info->signal, num_signal_sems, signal_sems, signal_values, fence, true);
        if (ret)
                radv_free_sem_info(sem_info);
 
@@ -3511,6 +3585,41 @@ radv_alloc_sem_info(struct radv_instance *instance,
        return ret;
 }
 
+static void
+radv_finalize_timelines(struct radv_device *device,
+                        uint32_t num_wait_sems,
+                        struct radv_semaphore_part **wait_sems,
+                        const uint64_t *wait_values,
+                        uint32_t num_signal_sems,
+                        struct radv_semaphore_part **signal_sems,
+                        const uint64_t *signal_values)
+{
+       for (uint32_t i = 0; i < num_wait_sems; ++i) {
+               if (wait_sems[i] && wait_sems[i]->kind == RADV_SEMAPHORE_TIMELINE) {
+                       pthread_mutex_lock(&wait_sems[i]->timeline.mutex);
+                       struct radv_timeline_point *point =
+                               radv_timeline_find_point_at_least_locked(device, &wait_sems[i]->timeline, wait_values[i]);
+                       if (point)
+                               --point->wait_count;
+                       pthread_mutex_unlock(&wait_sems[i]->timeline.mutex);
+               }
+       }
+       for (uint32_t i = 0; i < num_signal_sems; ++i) {
+               if (signal_sems[i] && signal_sems[i]->kind == RADV_SEMAPHORE_TIMELINE) {
+                       pthread_mutex_lock(&signal_sems[i]->timeline.mutex);
+                       struct radv_timeline_point *point =
+                               radv_timeline_find_point_at_least_locked(device, &signal_sems[i]->timeline, signal_values[i]);
+                       if (point) {
+                               signal_sems[i]->timeline.highest_submitted =
+                                       MAX2(signal_sems[i]->timeline.highest_submitted, point->value);
+                               point->wait_count--;
+                       }
+                       pthread_mutex_unlock(&signal_sems[i]->timeline.mutex);
+               }
+       }
+       pthread_cond_broadcast(&device->timeline_cond);
+}
+
 static void
 radv_sparse_buffer_bind_memory(struct radv_device *device,
                                const VkSparseBufferMemoryBindInfo *bind)
@@ -3606,6 +3715,9 @@ struct radv_deferred_queue_submission {
        uint32_t signal_semaphore_count;
        VkFence fence;
 
+       uint64_t *wait_values;
+       uint64_t *signal_values;
+
        struct radv_semaphore_part *temporary_semaphore_parts;
        uint32_t temporary_semaphore_part_count;
 };
@@ -3627,6 +3739,11 @@ struct radv_queue_submission {
        const VkSemaphore *signal_semaphores;
        uint32_t signal_semaphore_count;
        VkFence fence;
+
+       const uint64_t *wait_values;
+       uint32_t wait_value_count;
+       const uint64_t *signal_values;
+       uint32_t signal_value_count;
 };
 
 static VkResult
@@ -3649,6 +3766,8 @@ radv_create_deferred_submission(struct radv_queue *queue,
        size += submission->image_opaque_bind_count * sizeof(VkSparseImageOpaqueMemoryBindInfo);
        size += submission->wait_semaphore_count * sizeof(struct radv_semaphore_part *);
        size += submission->signal_semaphore_count * sizeof(struct radv_semaphore_part *);
+       size += submission->wait_value_count * sizeof(uint64_t);
+       size += submission->signal_value_count * sizeof(uint64_t);
 
        deferred = calloc(1, size);
        if (!deferred)
@@ -3706,6 +3825,11 @@ radv_create_deferred_submission(struct radv_queue *queue,
                }
        }
 
+       deferred->wait_values = (void*)(deferred->temporary_semaphore_parts + temporary_count);
+       memcpy(deferred->wait_values, submission->wait_values, submission->wait_value_count * sizeof(uint64_t));
+       deferred->signal_values = deferred->wait_values + submission->wait_value_count;
+       memcpy(deferred->signal_values, submission->signal_values, submission->signal_value_count * sizeof(uint64_t));
+
        *out = deferred;
        return VK_SUCCESS;
 }
@@ -3715,7 +3839,6 @@ radv_queue_submit_deferred(struct radv_deferred_queue_submission *submission)
 {
        RADV_FROM_HANDLE(radv_fence, fence, submission->fence);
        struct radv_queue *queue = submission->queue;
-       struct radeon_cmdbuf **cs_array;
        struct radeon_winsys_ctx *ctx = queue->hw_ctx;
        uint32_t max_cs_submission = queue->device->trace_bo ? 1 : RADV_MAX_IBS_PER_SUBMIT;
        struct radeon_winsys_fence *base_fence = fence ? fence->fence : NULL;
@@ -3737,12 +3860,14 @@ radv_queue_submit_deferred(struct radv_deferred_queue_submission *submission)
        if (result != VK_SUCCESS)
                goto fail;
 
-       result = radv_alloc_sem_info(queue->device->instance,
+       result = radv_alloc_sem_info(queue->device,
                                     &sem_info,
                                     submission->wait_semaphore_count,
                                     submission->wait_semaphores,
+                                    submission->wait_values,
                                     submission->signal_semaphore_count,
                                     submission->signal_semaphores,
+                                    submission->signal_values,
                                     submission->fence);
        if (result != VK_SUCCESS)
                goto fail;
@@ -3767,68 +3892,73 @@ radv_queue_submit_deferred(struct radv_deferred_queue_submission *submission)
                        radv_loge("failed to submit CS\n");
                        abort();
                }
-               radv_free_sem_info(&sem_info);
-               radv_free_temp_syncobjs(queue->device,
-                                       submission->temporary_semaphore_part_count,
-                                       submission->temporary_semaphore_parts);
-               free(submission);
-               return VK_SUCCESS;
-       }
 
-       cs_array = malloc(sizeof(struct radeon_cmdbuf *) *
-                                       (submission->cmd_buffer_count));
+               goto success;
+       } else {
+               struct radeon_cmdbuf **cs_array = malloc(sizeof(struct radeon_cmdbuf *) *
+                                                        (submission->cmd_buffer_count));
 
-       for (uint32_t j = 0; j < submission->cmd_buffer_count; j++) {
-               RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, submission->cmd_buffers[j]);
-               assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
+               for (uint32_t j = 0; j < submission->cmd_buffer_count; j++) {
+                       RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, submission->cmd_buffers[j]);
+                       assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
 
-               cs_array[j] = cmd_buffer->cs;
-               if ((cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT))
-                       can_patch = false;
+                       cs_array[j] = cmd_buffer->cs;
+                       if ((cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT))
+                               can_patch = false;
 
-               cmd_buffer->status = RADV_CMD_BUFFER_STATUS_PENDING;
-       }
+                       cmd_buffer->status = RADV_CMD_BUFFER_STATUS_PENDING;
+               }
 
-       for (uint32_t j = 0; j < submission->cmd_buffer_count; j += advance) {
-               struct radeon_cmdbuf *initial_preamble = (do_flush && !j) ? initial_flush_preamble_cs : initial_preamble_cs;
-               const struct radv_winsys_bo_list *bo_list = NULL;
+               for (uint32_t j = 0; j < submission->cmd_buffer_count; j += advance) {
+                       struct radeon_cmdbuf *initial_preamble = (do_flush && !j) ? initial_flush_preamble_cs : initial_preamble_cs;
+                       const struct radv_winsys_bo_list *bo_list = NULL;
 
-               advance = MIN2(max_cs_submission,
-                              submission->cmd_buffer_count - j);
+                       advance = MIN2(max_cs_submission,
+                                      submission->cmd_buffer_count - j);
 
-               if (queue->device->trace_bo)
-                       *queue->device->trace_id_ptr = 0;
+                       if (queue->device->trace_bo)
+                               *queue->device->trace_id_ptr = 0;
 
-               sem_info.cs_emit_wait = j == 0;
-               sem_info.cs_emit_signal = j + advance == submission->cmd_buffer_count;
+                       sem_info.cs_emit_wait = j == 0;
+                       sem_info.cs_emit_signal = j + advance == submission->cmd_buffer_count;
 
-               if (unlikely(queue->device->use_global_bo_list)) {
-                       pthread_mutex_lock(&queue->device->bo_list.mutex);
-                       bo_list = &queue->device->bo_list.list;
-               }
+                       if (unlikely(queue->device->use_global_bo_list)) {
+                               pthread_mutex_lock(&queue->device->bo_list.mutex);
+                               bo_list = &queue->device->bo_list.list;
+                       }
 
-               ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, cs_array + j,
-                                               advance, initial_preamble, continue_preamble_cs,
-                                               &sem_info, bo_list,
-                                               can_patch, base_fence);
+                       ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, cs_array + j,
+                                                          advance, initial_preamble, continue_preamble_cs,
+                                                          &sem_info, bo_list,
+                                                          can_patch, base_fence);
 
-               if (unlikely(queue->device->use_global_bo_list))
-                       pthread_mutex_unlock(&queue->device->bo_list.mutex);
+                       if (unlikely(queue->device->use_global_bo_list))
+                               pthread_mutex_unlock(&queue->device->bo_list.mutex);
 
-               if (ret) {
-                       radv_loge("failed to submit CS\n");
-                       abort();
-               }
-               if (queue->device->trace_bo) {
-                       radv_check_gpu_hangs(queue, cs_array[j]);
+                       if (ret) {
+                               radv_loge("failed to submit CS\n");
+                               abort();
+                       }
+                       if (queue->device->trace_bo) {
+                               radv_check_gpu_hangs(queue, cs_array[j]);
+                       }
                }
+
+               free(cs_array);
        }
 
+success:
        radv_free_temp_syncobjs(queue->device,
                                submission->temporary_semaphore_part_count,
                                submission->temporary_semaphore_parts);
+       radv_finalize_timelines(queue->device,
+                               submission->wait_semaphore_count,
+                               submission->wait_semaphores,
+                               submission->wait_values,
+                               submission->signal_semaphore_count,
+                               submission->signal_semaphores,
+                               submission->signal_values);
        radv_free_sem_info(&sem_info);
-       free(cs_array);
        free(submission);
        return VK_SUCCESS;
 
@@ -3895,6 +4025,9 @@ VkResult radv_QueueSubmit(
                        wait_dst_stage_mask |= pSubmits[i].pWaitDstStageMask[j];
                }
 
+               const VkTimelineSemaphoreSubmitInfoKHR *timeline_info =
+                       vk_find_struct_const(pSubmits[i].pNext, TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR);
+
                result = radv_queue_submit(queue, &(struct radv_queue_submission) {
                                .cmd_buffers = pSubmits[i].pCommandBuffers,
                                .cmd_buffer_count = pSubmits[i].commandBufferCount,
@@ -3904,7 +4037,11 @@ VkResult radv_QueueSubmit(
                                .wait_semaphore_count = pSubmits[i].waitSemaphoreCount,
                                .signal_semaphores = pSubmits[i].pSignalSemaphores,
                                .signal_semaphore_count = pSubmits[i].signalSemaphoreCount,
-                               .fence = i == fence_idx ? fence : VK_NULL_HANDLE
+                               .fence = i == fence_idx ? fence : VK_NULL_HANDLE,
+                               .wait_values = timeline_info ? timeline_info->pWaitSemaphoreValues : NULL,
+                               .wait_value_count = timeline_info && timeline_info->pWaitSemaphoreValues ? timeline_info->waitSemaphoreValueCount : 0,
+                               .signal_values = timeline_info ? timeline_info->pSignalSemaphoreValues : NULL,
+                               .signal_value_count = timeline_info && timeline_info->pSignalSemaphoreValues ? timeline_info->signalSemaphoreValueCount : 0,
                        });
                if (result != VK_SUCCESS)
                        return result;
@@ -4522,6 +4659,9 @@ static bool radv_sparse_bind_has_effects(const VkBindSparseInfo *info)
                if (i != fence_idx && !radv_sparse_bind_has_effects(pBindInfo + i))
                        continue;
 
+               const VkTimelineSemaphoreSubmitInfoKHR *timeline_info =
+                       vk_find_struct_const(pBindInfo[i].pNext, TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR);
+
                VkResult result = radv_queue_submit(queue, &(struct radv_queue_submission) {
                                .buffer_binds = pBindInfo[i].pBufferBinds,
                                .buffer_bind_count = pBindInfo[i].bufferBindCount,
@@ -4532,6 +4672,10 @@ static bool radv_sparse_bind_has_effects(const VkBindSparseInfo *info)
                                .signal_semaphores = pBindInfo[i].pSignalSemaphores,
                                .signal_semaphore_count = pBindInfo[i].signalSemaphoreCount,
                                .fence = i == fence_idx ? fence : VK_NULL_HANDLE,
+                               .wait_values = timeline_info ? timeline_info->pWaitSemaphoreValues : NULL,
+                               .wait_value_count = timeline_info && timeline_info->pWaitSemaphoreValues ? timeline_info->waitSemaphoreValueCount : 0,
+                               .signal_values = timeline_info ? timeline_info->pSignalSemaphoreValues : NULL,
+                               .signal_value_count = timeline_info && timeline_info->pSignalSemaphoreValues ? timeline_info->signalSemaphoreValueCount : 0,
                        });
 
                if (result != VK_SUCCESS)
@@ -4820,6 +4964,148 @@ VkResult radv_GetFenceStatus(VkDevice _device, VkFence _fence)
 
 // Queue semaphore functions
 
+static void
+radv_create_timeline(struct radv_timeline *timeline, uint64_t value)
+{
+       timeline->highest_signaled = value;
+       timeline->highest_submitted = value;
+       list_inithead(&timeline->points);
+       list_inithead(&timeline->free_points);
+       pthread_mutex_init(&timeline->mutex, NULL);
+}
+
+static void
+radv_destroy_timeline(struct radv_device *device,
+                      struct radv_timeline *timeline)
+{
+       list_for_each_entry_safe(struct radv_timeline_point, point,
+                                &timeline->free_points, list) {
+               list_del(&point->list);
+               device->ws->destroy_syncobj(device->ws, point->syncobj);
+               free(point);
+       }
+       list_for_each_entry_safe(struct radv_timeline_point, point,
+                                &timeline->points, list) {
+               list_del(&point->list);
+               device->ws->destroy_syncobj(device->ws, point->syncobj);
+               free(point);
+       }
+       pthread_mutex_destroy(&timeline->mutex);
+}
+
+static void
+radv_timeline_gc_locked(struct radv_device *device,
+                        struct radv_timeline *timeline)
+{
+       list_for_each_entry_safe(struct radv_timeline_point, point,
+                                &timeline->points, list) {
+               if (point->wait_count || point->value > timeline->highest_submitted)
+                       return;
+
+               if (device->ws->wait_syncobj(device->ws, &point->syncobj, 1, true, 0)) {
+                       timeline->highest_signaled = point->value;
+                       list_del(&point->list);
+                       list_add(&point->list, &timeline->free_points);
+               }
+       }
+}
+
+static struct radv_timeline_point *
+radv_timeline_find_point_at_least_locked(struct radv_device *device,
+                                         struct radv_timeline *timeline,
+                                         uint64_t p)
+{
+       radv_timeline_gc_locked(device, timeline);
+
+       if (p <= timeline->highest_signaled)
+               return NULL;
+
+       list_for_each_entry(struct radv_timeline_point, point,
+                           &timeline->points, list) {
+               if (point->value >= p) {
+                       ++point->wait_count;
+                       return point;
+               }
+       }
+       return NULL;
+}
+
+static struct radv_timeline_point *
+radv_timeline_add_point_locked(struct radv_device *device,
+                               struct radv_timeline *timeline,
+                               uint64_t p)
+{
+       radv_timeline_gc_locked(device, timeline);
+
+       struct radv_timeline_point *ret = NULL;
+       struct radv_timeline_point *prev = NULL;
+
+       if (p <= timeline->highest_signaled)
+               return NULL;
+
+       list_for_each_entry(struct radv_timeline_point, point,
+                           &timeline->points, list) {
+               if (point->value == p) {
+                       return NULL;
+               }
+
+               if (point->value < p)
+                       prev = point;
+       }
+
+       if (list_is_empty(&timeline->free_points)) {
+               ret = malloc(sizeof(struct radv_timeline_point));
+               device->ws->create_syncobj(device->ws, &ret->syncobj);
+       } else {
+               ret = list_first_entry(&timeline->free_points, struct radv_timeline_point, list);
+               list_del(&ret->list);
+
+               device->ws->reset_syncobj(device->ws, ret->syncobj);
+       }
+
+       ret->value = p;
+       ret->wait_count = 1;
+
+       if (prev) {
+               list_add(&ret->list, &prev->list);
+       } else {
+               list_addtail(&ret->list, &timeline->points);
+       }
+       return ret;
+}
+
+
+static VkResult
+radv_timeline_wait_locked(struct radv_device *device,
+                          struct radv_timeline *timeline,
+                          uint64_t value,
+                          uint64_t abs_timeout)
+{
+       while(timeline->highest_submitted < value) {
+               struct timespec abstime;
+               timespec_from_nsec(&abstime, abs_timeout);
+
+               pthread_cond_timedwait(&device->timeline_cond, &timeline->mutex, &abstime);
+
+               if (radv_get_current_time() >= abs_timeout && timeline->highest_submitted < value)
+                       return VK_TIMEOUT;
+       }
+
+       struct radv_timeline_point *point = radv_timeline_find_point_at_least_locked(device, timeline, value);
+       if (!point)
+               return VK_SUCCESS;
+
+       point->wait_count++;
+
+       pthread_mutex_unlock(&timeline->mutex);
+
+       bool success = device->ws->wait_syncobj(device->ws, &point->syncobj, 1, true, abs_timeout);
+
+       pthread_mutex_lock(&timeline->mutex);
+       point->wait_count--;
+       return success ? VK_SUCCESS : VK_TIMEOUT;
+}
+
 static
 void radv_destroy_semaphore_part(struct radv_device *device,
                                  struct radv_semaphore_part *part)
@@ -4830,6 +5116,9 @@ void radv_destroy_semaphore_part(struct radv_device *device,
        case RADV_SEMAPHORE_WINSYS:
                device->ws->destroy_sem(part->ws_sem);
                break;
+       case RADV_SEMAPHORE_TIMELINE:
+               radv_destroy_timeline(device, &part->timeline);
+               break;
        case RADV_SEMAPHORE_SYNCOBJ:
                device->ws->destroy_syncobj(device->ws, part->syncobj);
                break;
@@ -4837,6 +5126,20 @@ void radv_destroy_semaphore_part(struct radv_device *device,
        part->kind = RADV_SEMAPHORE_NONE;
 }
 
+static VkSemaphoreTypeKHR
+radv_get_semaphore_type(const void *pNext, uint64_t *initial_value)
+{
+       const VkSemaphoreTypeCreateInfoKHR *type_info =
+               vk_find_struct_const(pNext, SEMAPHORE_TYPE_CREATE_INFO_KHR);
+
+       if (!type_info)
+               return VK_SEMAPHORE_TYPE_BINARY_KHR;
+
+       if (initial_value)
+               *initial_value = type_info->initialValue;
+       return type_info->semaphoreType;
+}
+
 VkResult radv_CreateSemaphore(
        VkDevice                                    _device,
        const VkSemaphoreCreateInfo*                pCreateInfo,
@@ -4848,6 +5151,8 @@ VkResult radv_CreateSemaphore(
                vk_find_struct_const(pCreateInfo->pNext, EXPORT_SEMAPHORE_CREATE_INFO);
        VkExternalSemaphoreHandleTypeFlags handleTypes =
                export ? export->handleTypes : 0;
+       uint64_t initial_value = 0;
+       VkSemaphoreTypeKHR type = radv_get_semaphore_type(pCreateInfo->pNext, &initial_value);
 
        struct radv_semaphore *sem = vk_alloc2(&device->alloc, pAllocator,
                                               sizeof(*sem), 8,
@@ -4858,8 +5163,10 @@ VkResult radv_CreateSemaphore(
        sem->temporary.kind = RADV_SEMAPHORE_NONE;
        sem->permanent.kind = RADV_SEMAPHORE_NONE;
 
-       /* create a syncobject if we are going to export this semaphore */
-       if (device->always_use_syncobj || handleTypes) {
+       if (type == VK_SEMAPHORE_TYPE_TIMELINE_KHR) {
+               radv_create_timeline(&sem->permanent.timeline, initial_value);
+               sem->permanent.kind = RADV_SEMAPHORE_TIMELINE;
+       } else if (device->always_use_syncobj || handleTypes) {
                assert (device->physical_device->rad_info.has_syncobj);
                int ret = device->ws->create_syncobj(device->ws, &sem->permanent.syncobj);
                if (ret) {
@@ -4895,6 +5202,105 @@ void radv_DestroySemaphore(
        vk_free2(&device->alloc, pAllocator, sem);
 }
 
+VkResult
+radv_GetSemaphoreCounterValueKHR(VkDevice _device,
+                                 VkSemaphore _semaphore,
+                                 uint64_t* pValue)
+{
+       RADV_FROM_HANDLE(radv_device, device, _device);
+       RADV_FROM_HANDLE(radv_semaphore, semaphore, _semaphore);
+
+       struct radv_semaphore_part *part =
+               semaphore->temporary.kind != RADV_SEMAPHORE_NONE ? &semaphore->temporary : &semaphore->permanent;
+
+       switch (part->kind) {
+       case RADV_SEMAPHORE_TIMELINE: {
+               pthread_mutex_lock(&part->timeline.mutex);
+               radv_timeline_gc_locked(device, &part->timeline);
+               *pValue = part->timeline.highest_signaled;
+               pthread_mutex_unlock(&part->timeline.mutex);
+               return VK_SUCCESS;
+       }
+       case RADV_SEMAPHORE_NONE:
+       case RADV_SEMAPHORE_SYNCOBJ:
+       case RADV_SEMAPHORE_WINSYS:
+               unreachable("Invalid semaphore type");
+       }
+       unreachable("Unhandled semaphore type");
+}
+
+
+static VkResult
+radv_wait_timelines(struct radv_device *device,
+                    const VkSemaphoreWaitInfoKHR* pWaitInfo,
+                    uint64_t abs_timeout)
+{
+       if ((pWaitInfo->flags & VK_SEMAPHORE_WAIT_ANY_BIT_KHR) && pWaitInfo->semaphoreCount > 1) {
+               for (;;) {
+                       for(uint32_t i = 0; i < pWaitInfo->semaphoreCount; ++i) {
+                               RADV_FROM_HANDLE(radv_semaphore, semaphore, pWaitInfo->pSemaphores[i]);
+                               pthread_mutex_lock(&semaphore->permanent.timeline.mutex);
+                               VkResult result = radv_timeline_wait_locked(device, &semaphore->permanent.timeline, pWaitInfo->pValues[i], 0);
+                               pthread_mutex_unlock(&semaphore->permanent.timeline.mutex);
+
+                               if (result == VK_SUCCESS)
+                                       return VK_SUCCESS;
+                       }
+                       if (radv_get_current_time() > abs_timeout)
+                               return VK_TIMEOUT;
+               }
+       }
+
+       for(uint32_t i = 0; i < pWaitInfo->semaphoreCount; ++i) {
+               RADV_FROM_HANDLE(radv_semaphore, semaphore, pWaitInfo->pSemaphores[i]);
+               pthread_mutex_lock(&semaphore->permanent.timeline.mutex);
+               VkResult result = radv_timeline_wait_locked(device, &semaphore->permanent.timeline, pWaitInfo->pValues[i], abs_timeout);
+               pthread_mutex_unlock(&semaphore->permanent.timeline.mutex);
+
+               if (result != VK_SUCCESS)
+                       return result;
+       }
+       return VK_SUCCESS;
+}
+VkResult
+radv_WaitSemaphoresKHR(VkDevice _device,
+                       const VkSemaphoreWaitInfoKHR* pWaitInfo,
+                       uint64_t timeout)
+{
+       RADV_FROM_HANDLE(radv_device, device, _device);
+       uint64_t abs_timeout = radv_get_absolute_timeout(timeout);
+       return radv_wait_timelines(device, pWaitInfo, abs_timeout);
+}
+
+VkResult
+radv_SignalSemaphoreKHR(VkDevice _device,
+                        const VkSemaphoreSignalInfoKHR* pSignalInfo)
+{
+       RADV_FROM_HANDLE(radv_device, device, _device);
+       RADV_FROM_HANDLE(radv_semaphore, semaphore, pSignalInfo->semaphore);
+
+       struct radv_semaphore_part *part =
+               semaphore->temporary.kind != RADV_SEMAPHORE_NONE ? &semaphore->temporary : &semaphore->permanent;
+
+       switch(part->kind) {
+       case RADV_SEMAPHORE_TIMELINE: {
+               pthread_mutex_lock(&part->timeline.mutex);
+               radv_timeline_gc_locked(device, &part->timeline);
+               part->timeline.highest_submitted = MAX2(part->timeline.highest_submitted, pSignalInfo->value);
+               part->timeline.highest_signaled = MAX2(part->timeline.highest_signaled, pSignalInfo->value);
+               pthread_mutex_unlock(&part->timeline.mutex);
+               break;
+       }
+       case RADV_SEMAPHORE_NONE:
+       case RADV_SEMAPHORE_SYNCOBJ:
+       case RADV_SEMAPHORE_WINSYS:
+               unreachable("Invalid semaphore type");
+       }
+       return VK_SUCCESS;
+}
+
+
+
 VkResult radv_CreateEvent(
        VkDevice                                    _device,
        const VkEventCreateInfo*                    pCreateInfo,
@@ -6065,11 +6471,17 @@ void radv_GetPhysicalDeviceExternalSemaphoreProperties(
        VkExternalSemaphoreProperties               *pExternalSemaphoreProperties)
 {
        RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
+       VkSemaphoreTypeKHR type = radv_get_semaphore_type(pExternalSemaphoreInfo->pNext, NULL);
+       
+       if (type == VK_SEMAPHORE_TYPE_TIMELINE_KHR) {
+               pExternalSemaphoreProperties->exportFromImportedHandleTypes = 0;
+               pExternalSemaphoreProperties->compatibleHandleTypes = 0;
+               pExternalSemaphoreProperties->externalSemaphoreFeatures = 0;
 
        /* Require has_syncobj_wait_for_submit for the syncobj signal ioctl introduced at virtually the same time */
-       if (pdevice->rad_info.has_syncobj_wait_for_submit &&
-           (pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT || 
-            pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT)) {
+       } else if (pdevice->rad_info.has_syncobj_wait_for_submit &&
+                  (pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT || 
+                   pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT)) {
                pExternalSemaphoreProperties->exportFromImportedHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT | VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT;
                pExternalSemaphoreProperties->compatibleHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT | VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT;
                pExternalSemaphoreProperties->externalSemaphoreFeatures = VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT |
index a9e499df91bbef47d16f007219b9165444d1a09d..9f437f2253feb80365c48bd782650a466940f8df 100644 (file)
@@ -96,6 +96,7 @@ EXTENSIONS = [
     Extension('VK_KHR_surface',                          25, 'RADV_HAS_SURFACE'),
     Extension('VK_KHR_surface_protected_capabilities',    1, 'RADV_HAS_SURFACE'),
     Extension('VK_KHR_swapchain',                        68, 'RADV_HAS_SURFACE'),
+    Extension('VK_KHR_timeline_semaphore',                2, False),
     Extension('VK_KHR_uniform_buffer_standard_layout',    1, True),
     Extension('VK_KHR_variable_pointers',                 1, True),
     Extension('VK_KHR_wayland_surface',                   6, 'VK_USE_PLATFORM_WAYLAND_KHR'),
index ef7ba34c85b7f433b7f7d4dfbbcdf35e00fb3206..6eb4e5fbe3f458d630d07f71559d67114f9751c1 100644 (file)
@@ -820,6 +820,10 @@ struct radv_device {
        int force_aniso;
 
        struct radv_secure_compile_state *sc_state;
+
+       /* Condition variable for legacy timelines, to notify waiters when a
+        * new point gets submitted. */
+       pthread_cond_t timeline_cond;
 };
 
 struct radv_device_memory {
@@ -2168,13 +2172,40 @@ typedef enum {
        RADV_SEMAPHORE_NONE,
        RADV_SEMAPHORE_WINSYS,
        RADV_SEMAPHORE_SYNCOBJ,
+       RADV_SEMAPHORE_TIMELINE,
 } radv_semaphore_kind;
 
+struct radv_timeline_point {
+       struct list_head list;
+
+       uint64_t value;
+       uint32_t syncobj;
+
+       /* Separate from the list to accomodate CPU wait being async, as well
+        * as prevent point deletion during submission. */
+       unsigned wait_count;
+};
+
+struct radv_timeline {
+       /* Using a pthread mutex to be compatible with condition variables. */
+       pthread_mutex_t mutex;
+
+       uint64_t highest_signaled;
+       uint64_t highest_submitted;
+
+       struct list_head points;
+
+       /* Keep free points on hand so we do not have to recreate syncobjs all
+        * the time. */
+       struct list_head free_points;
+};
+
 struct radv_semaphore_part {
        radv_semaphore_kind kind;
        union {
                uint32_t syncobj;
                struct radeon_winsys_sem *ws_sem;
+               struct radv_timeline timeline;
        };
 };