From aba8c579a9780f9d3ad2d677f9ef625df7726479 Mon Sep 17 00:00:00 2001 From: Bas Nieuwenhuizen Date: Sun, 17 Nov 2019 06:23:15 +0100 Subject: [PATCH] turnip: semaphore support. There is only one queue for now, so for non-shared semaphores, the implementation is basically a no-op. For shared semaphores, this always uses syncobjs. This depends on syncobj support in the msm kernel driver. Part-of: --- include/drm-uapi/msm_drm.h | 24 +- src/freedreno/vulkan/tu_device.c | 308 ++++++++++++++++++++++---- src/freedreno/vulkan/tu_extensions.py | 4 +- src/freedreno/vulkan/tu_private.h | 21 +- 4 files changed, 312 insertions(+), 45 deletions(-) diff --git a/include/drm-uapi/msm_drm.h b/include/drm-uapi/msm_drm.h index 0b85ed6a371..19806eb3a8e 100644 --- a/include/drm-uapi/msm_drm.h +++ b/include/drm-uapi/msm_drm.h @@ -217,13 +217,28 @@ struct drm_msm_gem_submit_bo { #define MSM_SUBMIT_FENCE_FD_IN 0x40000000 /* enable input fence_fd */ #define MSM_SUBMIT_FENCE_FD_OUT 0x20000000 /* enable output fence_fd */ #define MSM_SUBMIT_SUDO 0x10000000 /* run submitted cmds from RB */ +#define MSM_SUBMIT_SYNCOBJ_IN 0x08000000 /* enable input syncobj */ +#define MSM_SUBMIT_SYNCOBJ_OUT 0x04000000 /* enable output syncobj */ #define MSM_SUBMIT_FLAGS ( \ MSM_SUBMIT_NO_IMPLICIT | \ MSM_SUBMIT_FENCE_FD_IN | \ MSM_SUBMIT_FENCE_FD_OUT | \ MSM_SUBMIT_SUDO | \ + MSM_SUBMIT_SYNCOBJ_IN | \ + MSM_SUBMIT_SYNCOBJ_OUT | \ 0) +#define MSM_SUBMIT_SYNCOBJ_RESET 0x00000001 /* Reset syncobj after wait. */ +#define MSM_SUBMIT_SYNCOBJ_FLAGS ( \ + MSM_SUBMIT_SYNCOBJ_RESET | \ + 0) + +struct drm_msm_gem_submit_syncobj { + __u32 handle; /* in, syncobj handle. */ + __u32 flags; /* in, from MSM_SUBMIT_SYNCOBJ_FLAGS */ + __u64 point; /* in, timepoint for timeline syncobjs. */ +}; + /* Each cmdstream submit consists of a table of buffers involved, and * one or more cmdstream buffers. This allows for conditional execution * (context-restore), and IB buffers needed for per tile/bin draw cmds. @@ -236,7 +251,14 @@ struct drm_msm_gem_submit { __u64 bos; /* in, ptr to array of submit_bo's */ __u64 cmds; /* in, ptr to array of submit_cmd's */ __s32 fence_fd; /* in/out fence fd (see MSM_SUBMIT_FENCE_FD_IN/OUT) */ - __u32 queueid; /* in, submitqueue id */ + __u32 queueid; /* in, submitqueue id */ + __u64 in_syncobjs; /* in, ptr to to array of drm_msm_gem_submit_syncobj */ + __u64 out_syncobjs; /* in, ptr to to array of drm_msm_gem_submit_syncobj */ + __u32 nr_in_syncobjs; /* in, number of entries in in_syncobj */ + __u32 nr_out_syncobjs; /* in, number of entries in out_syncobj. */ + __u32 syncobj_stride; /* in, stride of syncobj arrays. */ + __u32 pad; /*in, reserved for future use, always 0. */ + }; /* The normal way to synchronize with the GPU is just to CPU_PREP on diff --git a/src/freedreno/vulkan/tu_device.c b/src/freedreno/vulkan/tu_device.c index 80c42902a8e..43ed5f6e4a8 100644 --- a/src/freedreno/vulkan/tu_device.c +++ b/src/freedreno/vulkan/tu_device.c @@ -48,6 +48,10 @@ /* for fd_get_driver/device_uuid() */ #include "freedreno/common/freedreno_uuid.h" +static void +tu_semaphore_remove_temp(struct tu_device *device, + struct tu_semaphore *sem); + static int tu_device_get_cache_uuid(uint16_t family, void *uuid) { @@ -206,6 +210,9 @@ tu_physical_device_init(struct tu_physical_device *device, return result; } + device->msm_major_version = version->version_major; + device->msm_minor_version = version->version_minor; + drmFreeVersion(version); if (instance->debug_flags & TU_DEBUG_STARTUP) @@ -1456,6 +1463,65 @@ tu_GetDeviceQueue(VkDevice _device, tu_GetDeviceQueue2(_device, &info, pQueue); } +static VkResult +tu_get_semaphore_syncobjs(const VkSemaphore *sems, + uint32_t sem_count, + bool wait, + struct drm_msm_gem_submit_syncobj **out, + uint32_t *out_count) +{ + uint32_t syncobj_count = 0; + struct drm_msm_gem_submit_syncobj *syncobjs; + + for (uint32_t i = 0; i < sem_count; ++i) { + TU_FROM_HANDLE(tu_semaphore, sem, sems[i]); + + struct tu_semaphore_part *part = + sem->temporary.kind != TU_SEMAPHORE_NONE ? + &sem->temporary : &sem->permanent; + + if (part->kind == TU_SEMAPHORE_SYNCOBJ) + ++syncobj_count; + } + + *out = NULL; + *out_count = syncobj_count; + if (!syncobj_count) + return VK_SUCCESS; + + *out = syncobjs = calloc(syncobj_count, sizeof (*syncobjs)); + if (!syncobjs) + return VK_ERROR_OUT_OF_HOST_MEMORY; + + for (uint32_t i = 0, j = 0; i < sem_count; ++i) { + TU_FROM_HANDLE(tu_semaphore, sem, sems[i]); + + struct tu_semaphore_part *part = + sem->temporary.kind != TU_SEMAPHORE_NONE ? + &sem->temporary : &sem->permanent; + + if (part->kind == TU_SEMAPHORE_SYNCOBJ) { + syncobjs[j].handle = part->syncobj; + syncobjs[j].flags = wait ? MSM_SUBMIT_SYNCOBJ_RESET : 0; + ++j; + } + } + + return VK_SUCCESS; +} + + +static void +tu_semaphores_remove_temp(struct tu_device *device, + const VkSemaphore *sems, + uint32_t sem_count) +{ + for (uint32_t i = 0; i < sem_count; ++i) { + TU_FROM_HANDLE(tu_semaphore, sem, sems[i]); + tu_semaphore_remove_temp(device, sem); + } +} + VkResult tu_QueueSubmit(VkQueue _queue, uint32_t submitCount, @@ -1463,13 +1529,34 @@ tu_QueueSubmit(VkQueue _queue, VkFence _fence) { TU_FROM_HANDLE(tu_queue, queue, _queue); + VkResult result; for (uint32_t i = 0; i < submitCount; ++i) { const VkSubmitInfo *submit = pSubmits + i; const bool last_submit = (i == submitCount - 1); + struct drm_msm_gem_submit_syncobj *in_syncobjs = NULL, *out_syncobjs = NULL; + uint32_t nr_in_syncobjs, nr_out_syncobjs; struct tu_bo_list bo_list; tu_bo_list_init(&bo_list); + result = tu_get_semaphore_syncobjs(pSubmits[i].pWaitSemaphores, + pSubmits[i].waitSemaphoreCount, + false, &in_syncobjs, &nr_in_syncobjs); + if (result != VK_SUCCESS) { + /* TODO: emit VK_ERROR_DEVICE_LOST */ + fprintf(stderr, "failed to allocate space for semaphore submission\n"); + abort(); + } + + result = tu_get_semaphore_syncobjs(pSubmits[i].pSignalSemaphores, + pSubmits[i].signalSemaphoreCount, + false, &out_syncobjs, &nr_out_syncobjs); + if (result != VK_SUCCESS) { + /* TODO: emit VK_ERROR_DEVICE_LOST */ + fprintf(stderr, "failed to allocate space for semaphore submission\n"); + abort(); + } + uint32_t entry_count = 0; for (uint32_t j = 0; j < submit->commandBufferCount; ++j) { TU_FROM_HANDLE(tu_cmd_buffer, cmdbuf, submit->pCommandBuffers[j]); @@ -1497,6 +1584,13 @@ tu_QueueSubmit(VkQueue _queue, } uint32_t flags = MSM_PIPE_3D0; + if (nr_in_syncobjs) { + flags |= MSM_SUBMIT_SYNCOBJ_IN; + } + if (nr_out_syncobjs) { + flags |= MSM_SUBMIT_SYNCOBJ_OUT; + } + if (last_submit) { flags |= MSM_SUBMIT_FENCE_FD_OUT; } @@ -1508,6 +1602,11 @@ tu_QueueSubmit(VkQueue _queue, .nr_bos = bo_list.count, .cmds = (uint64_t)(uintptr_t)cmds, .nr_cmds = entry_count, + .in_syncobjs = (uint64_t)(uintptr_t)in_syncobjs, + .out_syncobjs = (uint64_t)(uintptr_t)out_syncobjs, + .nr_in_syncobjs = nr_in_syncobjs, + .nr_out_syncobjs = nr_out_syncobjs, + .syncobj_stride = sizeof(struct drm_msm_gem_submit_syncobj), }; int ret = drmCommandWriteRead(queue->device->physical_device->local_fd, @@ -1519,10 +1618,16 @@ tu_QueueSubmit(VkQueue _queue, } tu_bo_list_destroy(&bo_list); + free(in_syncobjs); + free(out_syncobjs); + tu_semaphores_remove_temp(queue->device, pSubmits[i].pWaitSemaphores, + pSubmits[i].waitSemaphoreCount); if (last_submit) { /* no need to merge fences as queue execution is serialized */ tu_fence_update_fd(&queue->submit_fence, req.fence_fd); + } else if (last_submit) { + close(req.fence_fd); } } @@ -1557,44 +1662,6 @@ tu_DeviceWaitIdle(VkDevice _device) return VK_SUCCESS; } -VkResult -tu_ImportSemaphoreFdKHR(VkDevice _device, - const VkImportSemaphoreFdInfoKHR *pImportSemaphoreFdInfo) -{ - tu_stub(); - - return VK_SUCCESS; -} - -VkResult -tu_GetSemaphoreFdKHR(VkDevice _device, - const VkSemaphoreGetFdInfoKHR *pGetFdInfo, - int *pFd) -{ - tu_stub(); - - return VK_SUCCESS; -} - -VkResult -tu_ImportFenceFdKHR(VkDevice _device, - const VkImportFenceFdInfoKHR *pImportFenceFdInfo) -{ - tu_stub(); - - return VK_SUCCESS; -} - -VkResult -tu_GetFenceFdKHR(VkDevice _device, - const VkFenceGetFdInfoKHR *pGetFdInfo, - int *pFd) -{ - tu_stub(); - - return VK_SUCCESS; -} - VkResult tu_EnumerateInstanceExtensionProperties(const char *pLayerName, uint32_t *pPropertyCount, @@ -1974,6 +2041,30 @@ tu_QueueBindSparse(VkQueue _queue, // Queue semaphore functions + +static void +tu_semaphore_part_destroy(struct tu_device *device, + struct tu_semaphore_part *part) +{ + switch(part->kind) { + case TU_SEMAPHORE_NONE: + break; + case TU_SEMAPHORE_SYNCOBJ: + drmSyncobjDestroy(device->physical_device->local_fd, part->syncobj); + break; + } + part->kind = TU_SEMAPHORE_NONE; +} + +static void +tu_semaphore_remove_temp(struct tu_device *device, + struct tu_semaphore *sem) +{ + if (sem->temporary.kind != TU_SEMAPHORE_NONE) { + tu_semaphore_part_destroy(device, &sem->temporary); + } +} + VkResult tu_CreateSemaphore(VkDevice _device, const VkSemaphoreCreateInfo *pCreateInfo, @@ -1988,6 +2079,21 @@ tu_CreateSemaphore(VkDevice _device, if (!sem) return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + const VkExportSemaphoreCreateInfo *export = + vk_find_struct_const(pCreateInfo->pNext, EXPORT_SEMAPHORE_CREATE_INFO); + VkExternalSemaphoreHandleTypeFlags handleTypes = + export ? export->handleTypes : 0; + + sem->permanent.kind = TU_SEMAPHORE_NONE; + sem->temporary.kind = TU_SEMAPHORE_NONE; + + if (handleTypes) { + if (drmSyncobjCreate(device->physical_device->local_fd, 0, &sem->permanent.syncobj) < 0) { + vk_free2(&device->alloc, pAllocator, sem); + return VK_ERROR_OUT_OF_HOST_MEMORY; + } + sem->permanent.kind = TU_SEMAPHORE_SYNCOBJ; + } *pSemaphore = tu_semaphore_to_handle(sem); return VK_SUCCESS; } @@ -2002,6 +2108,9 @@ tu_DestroySemaphore(VkDevice _device, if (!_semaphore) return; + tu_semaphore_part_destroy(device, &sem->permanent); + tu_semaphore_part_destroy(device, &sem->temporary); + vk_free2(&device->alloc, pAllocator, sem); } @@ -2339,15 +2448,132 @@ tu_GetMemoryFdPropertiesKHR(VkDevice _device, return VK_SUCCESS; } +VkResult +tu_ImportSemaphoreFdKHR(VkDevice _device, + const VkImportSemaphoreFdInfoKHR *pImportSemaphoreFdInfo) +{ + TU_FROM_HANDLE(tu_device, device, _device); + TU_FROM_HANDLE(tu_semaphore, sem, pImportSemaphoreFdInfo->semaphore); + int ret; + struct tu_semaphore_part *dst = NULL; + + if (pImportSemaphoreFdInfo->flags & VK_SEMAPHORE_IMPORT_TEMPORARY_BIT) { + dst = &sem->temporary; + } else { + dst = &sem->permanent; + } + + uint32_t syncobj = dst->kind == TU_SEMAPHORE_SYNCOBJ ? dst->syncobj : 0; + + switch(pImportSemaphoreFdInfo->handleType) { + case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT: { + uint32_t old_syncobj = syncobj; + ret = drmSyncobjFDToHandle(device->physical_device->local_fd, pImportSemaphoreFdInfo->fd, &syncobj); + if (ret == 0) { + close(pImportSemaphoreFdInfo->fd); + if (old_syncobj) + drmSyncobjDestroy(device->physical_device->local_fd, old_syncobj); + } + break; + } + case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT: { + if (!syncobj) { + ret = drmSyncobjCreate(device->physical_device->local_fd, 0, &syncobj); + if (ret) + break; + } + if (pImportSemaphoreFdInfo->fd == -1) { + ret = drmSyncobjSignal(device->physical_device->local_fd, &syncobj, 1); + } else { + ret = drmSyncobjImportSyncFile(device->physical_device->local_fd, syncobj, pImportSemaphoreFdInfo->fd); + } + if (!ret) + close(pImportSemaphoreFdInfo->fd); + break; + } + default: + unreachable("Unhandled semaphore handle type"); + } + + if (ret) { + return VK_ERROR_INVALID_EXTERNAL_HANDLE; + } + dst->syncobj = syncobj; + dst->kind = TU_SEMAPHORE_SYNCOBJ; + + return VK_SUCCESS; +} + +VkResult +tu_GetSemaphoreFdKHR(VkDevice _device, + const VkSemaphoreGetFdInfoKHR *pGetFdInfo, + int *pFd) +{ + TU_FROM_HANDLE(tu_device, device, _device); + TU_FROM_HANDLE(tu_semaphore, sem, pGetFdInfo->semaphore); + int ret; + uint32_t syncobj_handle; + + if (sem->temporary.kind != TU_SEMAPHORE_NONE) { + assert(sem->temporary.kind == TU_SEMAPHORE_SYNCOBJ); + syncobj_handle = sem->temporary.syncobj; + } else { + assert(sem->permanent.kind == TU_SEMAPHORE_SYNCOBJ); + syncobj_handle = sem->permanent.syncobj; + } + + switch(pGetFdInfo->handleType) { + case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT: + ret = drmSyncobjHandleToFD(device->physical_device->local_fd, syncobj_handle, pFd); + break; + case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT: + ret = drmSyncobjExportSyncFile(device->physical_device->local_fd, syncobj_handle, pFd); + if (!ret) { + if (sem->temporary.kind != TU_SEMAPHORE_NONE) { + tu_semaphore_part_destroy(device, &sem->temporary); + } else { + drmSyncobjReset(device->physical_device->local_fd, &syncobj_handle, 1); + } + } + break; + default: + unreachable("Unhandled semaphore handle type"); + } + + if (ret) + return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE); + return VK_SUCCESS; +} + + +static bool tu_has_syncobj(struct tu_physical_device *pdev) +{ + uint64_t value; + if (drmGetCap(pdev->local_fd, DRM_CAP_SYNCOBJ, &value)) + return false; + return value && pdev->msm_major_version == 1 && pdev->msm_minor_version >= 6; +} + void tu_GetPhysicalDeviceExternalSemaphoreProperties( VkPhysicalDevice physicalDevice, const VkPhysicalDeviceExternalSemaphoreInfo *pExternalSemaphoreInfo, VkExternalSemaphoreProperties *pExternalSemaphoreProperties) { - pExternalSemaphoreProperties->exportFromImportedHandleTypes = 0; - pExternalSemaphoreProperties->compatibleHandleTypes = 0; - pExternalSemaphoreProperties->externalSemaphoreFeatures = 0; + TU_FROM_HANDLE(tu_physical_device, pdev, physicalDevice); + + if (tu_has_syncobj(pdev) && + (pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT || + pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT)) { + pExternalSemaphoreProperties->exportFromImportedHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT | VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT; + pExternalSemaphoreProperties->compatibleHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT | VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT; + pExternalSemaphoreProperties->externalSemaphoreFeatures = VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT | + VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT; + } else { + pExternalSemaphoreProperties->exportFromImportedHandleTypes = 0; + pExternalSemaphoreProperties->compatibleHandleTypes = 0; + pExternalSemaphoreProperties->externalSemaphoreFeatures = 0; + } } void diff --git a/src/freedreno/vulkan/tu_extensions.py b/src/freedreno/vulkan/tu_extensions.py index ab47577150c..ebdcbd3a52d 100644 --- a/src/freedreno/vulkan/tu_extensions.py +++ b/src/freedreno/vulkan/tu_extensions.py @@ -75,8 +75,10 @@ EXTENSIONS = [ Extension('VK_EXT_sampler_filter_minmax', 1, True), Extension('VK_EXT_transform_feedback', 1, True), Extension('VK_ANDROID_native_buffer', 1, True), - Extension('VK_KHR_external_semaphore_fd', 1, True), Extension('VK_KHR_external_fence_fd', 1, True), + Extension('VK_KHR_external_semaphore', 1, True), + Extension('VK_KHR_external_semaphore_capabilities', 1, True), + Extension('VK_KHR_external_semaphore_fd', 1, True), Extension('VK_IMG_filter_cubic', 1, 'device->gpu_id == 650'), Extension('VK_EXT_filter_cubic', 1, 'device->gpu_id == 650'), Extension('VK_EXT_index_type_uint8', 1, True), diff --git a/src/freedreno/vulkan/tu_private.h b/src/freedreno/vulkan/tu_private.h index b00e891a01a..a70bb3b7dce 100644 --- a/src/freedreno/vulkan/tu_private.h +++ b/src/freedreno/vulkan/tu_private.h @@ -220,6 +220,9 @@ struct tu_physical_device uint32_t SP_UNKNOWN_A0F8; } magic; + int msm_major_version; + int msm_minor_version; + /* This is the drivers on-disk cache used as a fallback as opposed to * the pipeline cache defined by apps. */ @@ -1436,10 +1439,24 @@ struct tu_query_pool struct tu_bo bo; }; +enum tu_semaphore_kind +{ + TU_SEMAPHORE_NONE, + TU_SEMAPHORE_SYNCOBJ, +}; + +struct tu_semaphore_part +{ + enum tu_semaphore_kind kind; + union { + uint32_t syncobj; + }; +}; + struct tu_semaphore { - uint32_t syncobj; - uint32_t temp_syncobj; + struct tu_semaphore_part permanent; + struct tu_semaphore_part temporary; }; void -- 2.30.2