From 032861693eb311bb1b847b988ac8c395ebadb1a2 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 15 Feb 2017 16:48:47 -0800 Subject: [PATCH] anv: Move queues, events, and semaphores to their own file Things are about to get more complicated, especially as far as semaphores are concerned. Reviewed-by: Chad Versace --- src/intel/Makefile.sources | 1 + src/intel/vulkan/anv_device.c | 484 -------------------------------- src/intel/vulkan/anv_queue.c | 515 ++++++++++++++++++++++++++++++++++ 3 files changed, 516 insertions(+), 484 deletions(-) create mode 100644 src/intel/vulkan/anv_queue.c diff --git a/src/intel/Makefile.sources b/src/intel/Makefile.sources index 0d446614c62..e9a39a659ac 100644 --- a/src/intel/Makefile.sources +++ b/src/intel/Makefile.sources @@ -202,6 +202,7 @@ VULKAN_FILES := \ vulkan/anv_pipeline.c \ vulkan/anv_pipeline_cache.c \ vulkan/anv_private.h \ + vulkan/anv_queue.c \ vulkan/anv_util.c \ vulkan/anv_wsi.c \ vulkan/vk_format_info.h diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index 62974f19d80..cf32df66d2d 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -981,62 +981,6 @@ anv_device_init_border_colors(struct anv_device *device) border_colors); } -VkResult -anv_device_submit_simple_batch(struct anv_device *device, - struct anv_batch *batch) -{ - struct drm_i915_gem_execbuffer2 execbuf; - struct drm_i915_gem_exec_object2 exec2_objects[1]; - struct anv_bo bo, *exec_bos[1]; - VkResult result = VK_SUCCESS; - uint32_t size; - - /* Kernel driver requires 8 byte aligned batch length */ - size = align_u32(batch->next - batch->start, 8); - result = anv_bo_pool_alloc(&device->batch_bo_pool, &bo, size); - if (result != VK_SUCCESS) - return result; - - memcpy(bo.map, batch->start, size); - if (!device->info.has_llc) - anv_flush_range(bo.map, size); - - exec_bos[0] = &bo; - exec2_objects[0].handle = bo.gem_handle; - exec2_objects[0].relocation_count = 0; - exec2_objects[0].relocs_ptr = 0; - exec2_objects[0].alignment = 0; - exec2_objects[0].offset = bo.offset; - exec2_objects[0].flags = 0; - exec2_objects[0].rsvd1 = 0; - exec2_objects[0].rsvd2 = 0; - - execbuf.buffers_ptr = (uintptr_t) exec2_objects; - execbuf.buffer_count = 1; - execbuf.batch_start_offset = 0; - execbuf.batch_len = size; - execbuf.cliprects_ptr = 0; - execbuf.num_cliprects = 0; - execbuf.DR1 = 0; - execbuf.DR4 = 0; - - execbuf.flags = - I915_EXEC_HANDLE_LUT | I915_EXEC_NO_RELOC | I915_EXEC_RENDER; - execbuf.rsvd1 = device->context_id; - execbuf.rsvd2 = 0; - - result = anv_device_execbuf(device, &execbuf, exec_bos); - if (result != VK_SUCCESS) - goto fail; - - result = anv_device_wait(device, &bo, INT64_MAX); - - fail: - anv_bo_pool_free(&device->batch_bo_pool, &bo); - - return result; -} - VkResult anv_CreateDevice( VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo* pCreateInfo, @@ -1349,26 +1293,6 @@ void anv_GetDeviceQueue( *pQueue = anv_queue_to_handle(&device->queue); } -VkResult -anv_device_execbuf(struct anv_device *device, - struct drm_i915_gem_execbuffer2 *execbuf, - struct anv_bo **execbuf_bos) -{ - int ret = anv_gem_execbuffer(device, execbuf); - if (ret != 0) { - /* We don't know the real error. */ - device->lost = true; - return vk_errorf(VK_ERROR_DEVICE_LOST, "execbuf2 failed: %m"); - } - - struct drm_i915_gem_exec_object2 *objects = - (void *)(uintptr_t)execbuf->buffers_ptr; - for (uint32_t k = 0; k < execbuf->buffer_count; k++) - execbuf_bos[k]->offset = objects[k].offset; - - return VK_SUCCESS; -} - VkResult anv_device_query_status(struct anv_device *device) { @@ -1446,119 +1370,6 @@ anv_device_wait(struct anv_device *device, struct anv_bo *bo, return anv_device_query_status(device); } -VkResult anv_QueueSubmit( - VkQueue _queue, - uint32_t submitCount, - const VkSubmitInfo* pSubmits, - VkFence _fence) -{ - ANV_FROM_HANDLE(anv_queue, queue, _queue); - ANV_FROM_HANDLE(anv_fence, fence, _fence); - struct anv_device *device = queue->device; - - /* Query for device status prior to submitting. Technically, we don't need - * to do this. However, if we have a client that's submitting piles of - * garbage, we would rather break as early as possible to keep the GPU - * hanging contained. If we don't check here, we'll either be waiting for - * the kernel to kick us or we'll have to wait until the client waits on a - * fence before we actually know whether or not we've hung. - */ - VkResult result = anv_device_query_status(device); - if (result != VK_SUCCESS) - return result; - - /* We lock around QueueSubmit for three main reasons: - * - * 1) When a block pool is resized, we create a new gem handle with a - * different size and, in the case of surface states, possibly a - * different center offset but we re-use the same anv_bo struct when - * we do so. If this happens in the middle of setting up an execbuf, - * we could end up with our list of BOs out of sync with our list of - * gem handles. - * - * 2) The algorithm we use for building the list of unique buffers isn't - * thread-safe. While the client is supposed to syncronize around - * QueueSubmit, this would be extremely difficult to debug if it ever - * came up in the wild due to a broken app. It's better to play it - * safe and just lock around QueueSubmit. - * - * 3) The anv_cmd_buffer_execbuf function may perform relocations in - * userspace. Due to the fact that the surface state buffer is shared - * between batches, we can't afford to have that happen from multiple - * threads at the same time. Even though the user is supposed to - * ensure this doesn't happen, we play it safe as in (2) above. - * - * Since the only other things that ever take the device lock such as block - * pool resize only rarely happen, this will almost never be contended so - * taking a lock isn't really an expensive operation in this case. - */ - pthread_mutex_lock(&device->mutex); - - for (uint32_t i = 0; i < submitCount; i++) { - for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) { - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, - pSubmits[i].pCommandBuffers[j]); - assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY); - assert(!anv_batch_has_error(&cmd_buffer->batch)); - - result = anv_cmd_buffer_execbuf(device, cmd_buffer); - if (result != VK_SUCCESS) - goto out; - } - } - - if (fence) { - struct anv_bo *fence_bo = &fence->bo; - result = anv_device_execbuf(device, &fence->execbuf, &fence_bo); - if (result != VK_SUCCESS) - goto out; - - /* Update the fence and wake up any waiters */ - assert(fence->state == ANV_FENCE_STATE_RESET); - fence->state = ANV_FENCE_STATE_SUBMITTED; - pthread_cond_broadcast(&device->queue_submit); - } - -out: - if (result != VK_SUCCESS) { - /* In the case that something has gone wrong we may end up with an - * inconsistent state from which it may not be trivial to recover. - * For example, we might have computed address relocations and - * any future attempt to re-submit this job will need to know about - * this and avoid computing relocation addresses again. - * - * To avoid this sort of issues, we assume that if something was - * wrong during submission we must already be in a really bad situation - * anyway (such us being out of memory) and return - * VK_ERROR_DEVICE_LOST to ensure that clients do not attempt to - * submit the same job again to this device. - */ - result = VK_ERROR_DEVICE_LOST; - device->lost = true; - - /* If we return VK_ERROR_DEVICE LOST here, we need to ensure that - * vkWaitForFences() and vkGetFenceStatus() return a valid result - * (VK_SUCCESS or VK_ERROR_DEVICE_LOST) in a finite amount of time. - * Setting the fence status to SIGNALED ensures this will happen in - * any case. - */ - if (fence) - fence->state = ANV_FENCE_STATE_SIGNALED; - } - - pthread_mutex_unlock(&device->mutex); - - return result; -} - -VkResult anv_QueueWaitIdle( - VkQueue _queue) -{ - ANV_FROM_HANDLE(anv_queue, queue, _queue); - - return anv_DeviceWaitIdle(anv_device_to_handle(queue->device)); -} - VkResult anv_DeviceWaitIdle( VkDevice _device) { @@ -1953,301 +1764,6 @@ VkResult anv_QueueBindSparse( return vk_error(VK_ERROR_FEATURE_NOT_PRESENT); } -VkResult anv_CreateFence( - VkDevice _device, - const VkFenceCreateInfo* pCreateInfo, - const VkAllocationCallbacks* pAllocator, - VkFence* pFence) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - struct anv_bo fence_bo; - struct anv_fence *fence; - struct anv_batch batch; - VkResult result; - - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FENCE_CREATE_INFO); - - result = anv_bo_pool_alloc(&device->batch_bo_pool, &fence_bo, 4096); - if (result != VK_SUCCESS) - return result; - - /* Fences are small. Just store the CPU data structure in the BO. */ - fence = fence_bo.map; - fence->bo = fence_bo; - - /* Place the batch after the CPU data but on its own cache line. */ - const uint32_t batch_offset = align_u32(sizeof(*fence), CACHELINE_SIZE); - batch.next = batch.start = fence->bo.map + batch_offset; - batch.end = fence->bo.map + fence->bo.size; - anv_batch_emit(&batch, GEN7_MI_BATCH_BUFFER_END, bbe); - anv_batch_emit(&batch, GEN7_MI_NOOP, noop); - - if (!device->info.has_llc) { - assert(((uintptr_t) batch.start & CACHELINE_MASK) == 0); - assert(batch.next - batch.start <= CACHELINE_SIZE); - __builtin_ia32_mfence(); - __builtin_ia32_clflush(batch.start); - } - - fence->exec2_objects[0].handle = fence->bo.gem_handle; - fence->exec2_objects[0].relocation_count = 0; - fence->exec2_objects[0].relocs_ptr = 0; - fence->exec2_objects[0].alignment = 0; - fence->exec2_objects[0].offset = fence->bo.offset; - fence->exec2_objects[0].flags = 0; - fence->exec2_objects[0].rsvd1 = 0; - fence->exec2_objects[0].rsvd2 = 0; - - fence->execbuf.buffers_ptr = (uintptr_t) fence->exec2_objects; - fence->execbuf.buffer_count = 1; - fence->execbuf.batch_start_offset = batch.start - fence->bo.map; - fence->execbuf.batch_len = batch.next - batch.start; - fence->execbuf.cliprects_ptr = 0; - fence->execbuf.num_cliprects = 0; - fence->execbuf.DR1 = 0; - fence->execbuf.DR4 = 0; - - fence->execbuf.flags = - I915_EXEC_HANDLE_LUT | I915_EXEC_NO_RELOC | I915_EXEC_RENDER; - fence->execbuf.rsvd1 = device->context_id; - fence->execbuf.rsvd2 = 0; - - if (pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT) { - fence->state = ANV_FENCE_STATE_SIGNALED; - } else { - fence->state = ANV_FENCE_STATE_RESET; - } - - *pFence = anv_fence_to_handle(fence); - - return VK_SUCCESS; -} - -void anv_DestroyFence( - VkDevice _device, - VkFence _fence, - const VkAllocationCallbacks* pAllocator) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_fence, fence, _fence); - - if (!fence) - return; - - assert(fence->bo.map == fence); - anv_bo_pool_free(&device->batch_bo_pool, &fence->bo); -} - -VkResult anv_ResetFences( - VkDevice _device, - uint32_t fenceCount, - const VkFence* pFences) -{ - for (uint32_t i = 0; i < fenceCount; i++) { - ANV_FROM_HANDLE(anv_fence, fence, pFences[i]); - fence->state = ANV_FENCE_STATE_RESET; - } - - return VK_SUCCESS; -} - -VkResult anv_GetFenceStatus( - VkDevice _device, - VkFence _fence) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_fence, fence, _fence); - - if (unlikely(device->lost)) - return VK_ERROR_DEVICE_LOST; - - switch (fence->state) { - case ANV_FENCE_STATE_RESET: - /* If it hasn't even been sent off to the GPU yet, it's not ready */ - return VK_NOT_READY; - - case ANV_FENCE_STATE_SIGNALED: - /* It's been signaled, return success */ - return VK_SUCCESS; - - case ANV_FENCE_STATE_SUBMITTED: { - VkResult result = anv_device_bo_busy(device, &fence->bo); - if (result == VK_SUCCESS) { - fence->state = ANV_FENCE_STATE_SIGNALED; - return VK_SUCCESS; - } else { - return result; - } - } - default: - unreachable("Invalid fence status"); - } -} - -#define NSEC_PER_SEC 1000000000 -#define INT_TYPE_MAX(type) ((1ull << (sizeof(type) * 8 - 1)) - 1) - -VkResult anv_WaitForFences( - VkDevice _device, - uint32_t fenceCount, - const VkFence* pFences, - VkBool32 waitAll, - uint64_t _timeout) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - int ret; - - if (unlikely(device->lost)) - return VK_ERROR_DEVICE_LOST; - - /* DRM_IOCTL_I915_GEM_WAIT uses a signed 64 bit timeout and is supposed - * to block indefinitely timeouts <= 0. Unfortunately, this was broken - * for a couple of kernel releases. Since there's no way to know - * whether or not the kernel we're using is one of the broken ones, the - * best we can do is to clamp the timeout to INT64_MAX. This limits the - * maximum timeout from 584 years to 292 years - likely not a big deal. - */ - int64_t timeout = MIN2(_timeout, INT64_MAX); - - VkResult result = VK_SUCCESS; - uint32_t pending_fences = fenceCount; - while (pending_fences) { - pending_fences = 0; - bool signaled_fences = false; - for (uint32_t i = 0; i < fenceCount; i++) { - ANV_FROM_HANDLE(anv_fence, fence, pFences[i]); - switch (fence->state) { - case ANV_FENCE_STATE_RESET: - /* This fence hasn't been submitted yet, we'll catch it the next - * time around. Yes, this may mean we dead-loop but, short of - * lots of locking and a condition variable, there's not much that - * we can do about that. - */ - pending_fences++; - continue; - - case ANV_FENCE_STATE_SIGNALED: - /* This fence is not pending. If waitAll isn't set, we can return - * early. Otherwise, we have to keep going. - */ - if (!waitAll) { - result = VK_SUCCESS; - goto done; - } - continue; - - case ANV_FENCE_STATE_SUBMITTED: - /* These are the fences we really care about. Go ahead and wait - * on it until we hit a timeout. - */ - result = anv_device_wait(device, &fence->bo, timeout); - switch (result) { - case VK_SUCCESS: - fence->state = ANV_FENCE_STATE_SIGNALED; - signaled_fences = true; - if (!waitAll) - goto done; - break; - - case VK_TIMEOUT: - goto done; - - default: - return result; - } - } - } - - if (pending_fences && !signaled_fences) { - /* If we've hit this then someone decided to vkWaitForFences before - * they've actually submitted any of them to a queue. This is a - * fairly pessimal case, so it's ok to lock here and use a standard - * pthreads condition variable. - */ - pthread_mutex_lock(&device->mutex); - - /* It's possible that some of the fences have changed state since the - * last time we checked. Now that we have the lock, check for - * pending fences again and don't wait if it's changed. - */ - uint32_t now_pending_fences = 0; - for (uint32_t i = 0; i < fenceCount; i++) { - ANV_FROM_HANDLE(anv_fence, fence, pFences[i]); - if (fence->state == ANV_FENCE_STATE_RESET) - now_pending_fences++; - } - assert(now_pending_fences <= pending_fences); - - if (now_pending_fences == pending_fences) { - struct timespec before; - clock_gettime(CLOCK_MONOTONIC, &before); - - uint32_t abs_nsec = before.tv_nsec + timeout % NSEC_PER_SEC; - uint64_t abs_sec = before.tv_sec + (abs_nsec / NSEC_PER_SEC) + - (timeout / NSEC_PER_SEC); - abs_nsec %= NSEC_PER_SEC; - - /* Avoid roll-over in tv_sec on 32-bit systems if the user - * provided timeout is UINT64_MAX - */ - struct timespec abstime; - abstime.tv_nsec = abs_nsec; - abstime.tv_sec = MIN2(abs_sec, INT_TYPE_MAX(abstime.tv_sec)); - - ret = pthread_cond_timedwait(&device->queue_submit, - &device->mutex, &abstime); - assert(ret != EINVAL); - - struct timespec after; - clock_gettime(CLOCK_MONOTONIC, &after); - uint64_t time_elapsed = - ((uint64_t)after.tv_sec * NSEC_PER_SEC + after.tv_nsec) - - ((uint64_t)before.tv_sec * NSEC_PER_SEC + before.tv_nsec); - - if (time_elapsed >= timeout) { - pthread_mutex_unlock(&device->mutex); - result = VK_TIMEOUT; - goto done; - } - - timeout -= time_elapsed; - } - - pthread_mutex_unlock(&device->mutex); - } - } - -done: - if (unlikely(device->lost)) - return VK_ERROR_DEVICE_LOST; - - return result; -} - -// Queue semaphore functions - -VkResult anv_CreateSemaphore( - VkDevice device, - const VkSemaphoreCreateInfo* pCreateInfo, - const VkAllocationCallbacks* pAllocator, - VkSemaphore* pSemaphore) -{ - /* The DRM execbuffer ioctl always execute in-oder, even between different - * rings. As such, there's nothing to do for the user space semaphore. - */ - - *pSemaphore = (VkSemaphore)1; - - return VK_SUCCESS; -} - -void anv_DestroySemaphore( - VkDevice device, - VkSemaphore semaphore, - const VkAllocationCallbacks* pAllocator) -{ -} - // Event functions VkResult anv_CreateEvent( diff --git a/src/intel/vulkan/anv_queue.c b/src/intel/vulkan/anv_queue.c new file mode 100644 index 00000000000..5a22ff7fe60 --- /dev/null +++ b/src/intel/vulkan/anv_queue.c @@ -0,0 +1,515 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/** + * This file implements VkQueue, VkFence, and VkSemaphore + */ + +#include "anv_private.h" +#include "util/vk_util.h" + +#include "genxml/gen7_pack.h" + +VkResult +anv_device_execbuf(struct anv_device *device, + struct drm_i915_gem_execbuffer2 *execbuf, + struct anv_bo **execbuf_bos) +{ + int ret = anv_gem_execbuffer(device, execbuf); + if (ret != 0) { + /* We don't know the real error. */ + device->lost = true; + return vk_errorf(VK_ERROR_DEVICE_LOST, "execbuf2 failed: %m"); + } + + struct drm_i915_gem_exec_object2 *objects = + (void *)(uintptr_t)execbuf->buffers_ptr; + for (uint32_t k = 0; k < execbuf->buffer_count; k++) + execbuf_bos[k]->offset = objects[k].offset; + + return VK_SUCCESS; +} + +VkResult +anv_device_submit_simple_batch(struct anv_device *device, + struct anv_batch *batch) +{ + struct drm_i915_gem_execbuffer2 execbuf; + struct drm_i915_gem_exec_object2 exec2_objects[1]; + struct anv_bo bo, *exec_bos[1]; + VkResult result = VK_SUCCESS; + uint32_t size; + + /* Kernel driver requires 8 byte aligned batch length */ + size = align_u32(batch->next - batch->start, 8); + result = anv_bo_pool_alloc(&device->batch_bo_pool, &bo, size); + if (result != VK_SUCCESS) + return result; + + memcpy(bo.map, batch->start, size); + if (!device->info.has_llc) + anv_flush_range(bo.map, size); + + exec_bos[0] = &bo; + exec2_objects[0].handle = bo.gem_handle; + exec2_objects[0].relocation_count = 0; + exec2_objects[0].relocs_ptr = 0; + exec2_objects[0].alignment = 0; + exec2_objects[0].offset = bo.offset; + exec2_objects[0].flags = 0; + exec2_objects[0].rsvd1 = 0; + exec2_objects[0].rsvd2 = 0; + + execbuf.buffers_ptr = (uintptr_t) exec2_objects; + execbuf.buffer_count = 1; + execbuf.batch_start_offset = 0; + execbuf.batch_len = size; + execbuf.cliprects_ptr = 0; + execbuf.num_cliprects = 0; + execbuf.DR1 = 0; + execbuf.DR4 = 0; + + execbuf.flags = + I915_EXEC_HANDLE_LUT | I915_EXEC_NO_RELOC | I915_EXEC_RENDER; + execbuf.rsvd1 = device->context_id; + execbuf.rsvd2 = 0; + + result = anv_device_execbuf(device, &execbuf, exec_bos); + if (result != VK_SUCCESS) + goto fail; + + result = anv_device_wait(device, &bo, INT64_MAX); + + fail: + anv_bo_pool_free(&device->batch_bo_pool, &bo); + + return result; +} + +VkResult anv_QueueSubmit( + VkQueue _queue, + uint32_t submitCount, + const VkSubmitInfo* pSubmits, + VkFence _fence) +{ + ANV_FROM_HANDLE(anv_queue, queue, _queue); + ANV_FROM_HANDLE(anv_fence, fence, _fence); + struct anv_device *device = queue->device; + + /* Query for device status prior to submitting. Technically, we don't need + * to do this. However, if we have a client that's submitting piles of + * garbage, we would rather break as early as possible to keep the GPU + * hanging contained. If we don't check here, we'll either be waiting for + * the kernel to kick us or we'll have to wait until the client waits on a + * fence before we actually know whether or not we've hung. + */ + VkResult result = anv_device_query_status(device); + if (result != VK_SUCCESS) + return result; + + /* We lock around QueueSubmit for three main reasons: + * + * 1) When a block pool is resized, we create a new gem handle with a + * different size and, in the case of surface states, possibly a + * different center offset but we re-use the same anv_bo struct when + * we do so. If this happens in the middle of setting up an execbuf, + * we could end up with our list of BOs out of sync with our list of + * gem handles. + * + * 2) The algorithm we use for building the list of unique buffers isn't + * thread-safe. While the client is supposed to syncronize around + * QueueSubmit, this would be extremely difficult to debug if it ever + * came up in the wild due to a broken app. It's better to play it + * safe and just lock around QueueSubmit. + * + * 3) The anv_cmd_buffer_execbuf function may perform relocations in + * userspace. Due to the fact that the surface state buffer is shared + * between batches, we can't afford to have that happen from multiple + * threads at the same time. Even though the user is supposed to + * ensure this doesn't happen, we play it safe as in (2) above. + * + * Since the only other things that ever take the device lock such as block + * pool resize only rarely happen, this will almost never be contended so + * taking a lock isn't really an expensive operation in this case. + */ + pthread_mutex_lock(&device->mutex); + + for (uint32_t i = 0; i < submitCount; i++) { + for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) { + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, + pSubmits[i].pCommandBuffers[j]); + assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY); + assert(!anv_batch_has_error(&cmd_buffer->batch)); + + result = anv_cmd_buffer_execbuf(device, cmd_buffer); + if (result != VK_SUCCESS) + goto out; + } + } + + if (fence) { + struct anv_bo *fence_bo = &fence->bo; + result = anv_device_execbuf(device, &fence->execbuf, &fence_bo); + if (result != VK_SUCCESS) + goto out; + + /* Update the fence and wake up any waiters */ + assert(fence->state == ANV_FENCE_STATE_RESET); + fence->state = ANV_FENCE_STATE_SUBMITTED; + pthread_cond_broadcast(&device->queue_submit); + } + +out: + if (result != VK_SUCCESS) { + /* In the case that something has gone wrong we may end up with an + * inconsistent state from which it may not be trivial to recover. + * For example, we might have computed address relocations and + * any future attempt to re-submit this job will need to know about + * this and avoid computing relocation addresses again. + * + * To avoid this sort of issues, we assume that if something was + * wrong during submission we must already be in a really bad situation + * anyway (such us being out of memory) and return + * VK_ERROR_DEVICE_LOST to ensure that clients do not attempt to + * submit the same job again to this device. + */ + result = VK_ERROR_DEVICE_LOST; + device->lost = true; + + /* If we return VK_ERROR_DEVICE LOST here, we need to ensure that + * vkWaitForFences() and vkGetFenceStatus() return a valid result + * (VK_SUCCESS or VK_ERROR_DEVICE_LOST) in a finite amount of time. + * Setting the fence status to SIGNALED ensures this will happen in + * any case. + */ + if (fence) + fence->state = ANV_FENCE_STATE_SIGNALED; + } + + pthread_mutex_unlock(&device->mutex); + + return result; +} + +VkResult anv_QueueWaitIdle( + VkQueue _queue) +{ + ANV_FROM_HANDLE(anv_queue, queue, _queue); + + return anv_DeviceWaitIdle(anv_device_to_handle(queue->device)); +} + +VkResult anv_CreateFence( + VkDevice _device, + const VkFenceCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkFence* pFence) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_bo fence_bo; + struct anv_fence *fence; + struct anv_batch batch; + VkResult result; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FENCE_CREATE_INFO); + + result = anv_bo_pool_alloc(&device->batch_bo_pool, &fence_bo, 4096); + if (result != VK_SUCCESS) + return result; + + /* Fences are small. Just store the CPU data structure in the BO. */ + fence = fence_bo.map; + fence->bo = fence_bo; + + /* Place the batch after the CPU data but on its own cache line. */ + const uint32_t batch_offset = align_u32(sizeof(*fence), CACHELINE_SIZE); + batch.next = batch.start = fence->bo.map + batch_offset; + batch.end = fence->bo.map + fence->bo.size; + anv_batch_emit(&batch, GEN7_MI_BATCH_BUFFER_END, bbe); + anv_batch_emit(&batch, GEN7_MI_NOOP, noop); + + if (!device->info.has_llc) { + assert(((uintptr_t) batch.start & CACHELINE_MASK) == 0); + assert(batch.next - batch.start <= CACHELINE_SIZE); + __builtin_ia32_mfence(); + __builtin_ia32_clflush(batch.start); + } + + fence->exec2_objects[0].handle = fence->bo.gem_handle; + fence->exec2_objects[0].relocation_count = 0; + fence->exec2_objects[0].relocs_ptr = 0; + fence->exec2_objects[0].alignment = 0; + fence->exec2_objects[0].offset = fence->bo.offset; + fence->exec2_objects[0].flags = 0; + fence->exec2_objects[0].rsvd1 = 0; + fence->exec2_objects[0].rsvd2 = 0; + + fence->execbuf.buffers_ptr = (uintptr_t) fence->exec2_objects; + fence->execbuf.buffer_count = 1; + fence->execbuf.batch_start_offset = batch.start - fence->bo.map; + fence->execbuf.batch_len = batch.next - batch.start; + fence->execbuf.cliprects_ptr = 0; + fence->execbuf.num_cliprects = 0; + fence->execbuf.DR1 = 0; + fence->execbuf.DR4 = 0; + + fence->execbuf.flags = + I915_EXEC_HANDLE_LUT | I915_EXEC_NO_RELOC | I915_EXEC_RENDER; + fence->execbuf.rsvd1 = device->context_id; + fence->execbuf.rsvd2 = 0; + + if (pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT) { + fence->state = ANV_FENCE_STATE_SIGNALED; + } else { + fence->state = ANV_FENCE_STATE_RESET; + } + + *pFence = anv_fence_to_handle(fence); + + return VK_SUCCESS; +} + +void anv_DestroyFence( + VkDevice _device, + VkFence _fence, + const VkAllocationCallbacks* pAllocator) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_fence, fence, _fence); + + if (!fence) + return; + + assert(fence->bo.map == fence); + anv_bo_pool_free(&device->batch_bo_pool, &fence->bo); +} + +VkResult anv_ResetFences( + VkDevice _device, + uint32_t fenceCount, + const VkFence* pFences) +{ + for (uint32_t i = 0; i < fenceCount; i++) { + ANV_FROM_HANDLE(anv_fence, fence, pFences[i]); + fence->state = ANV_FENCE_STATE_RESET; + } + + return VK_SUCCESS; +} + +VkResult anv_GetFenceStatus( + VkDevice _device, + VkFence _fence) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_fence, fence, _fence); + + if (unlikely(device->lost)) + return VK_ERROR_DEVICE_LOST; + + switch (fence->state) { + case ANV_FENCE_STATE_RESET: + /* If it hasn't even been sent off to the GPU yet, it's not ready */ + return VK_NOT_READY; + + case ANV_FENCE_STATE_SIGNALED: + /* It's been signaled, return success */ + return VK_SUCCESS; + + case ANV_FENCE_STATE_SUBMITTED: { + VkResult result = anv_device_bo_busy(device, &fence->bo); + if (result == VK_SUCCESS) { + fence->state = ANV_FENCE_STATE_SIGNALED; + return VK_SUCCESS; + } else { + return result; + } + } + default: + unreachable("Invalid fence status"); + } +} + +#define NSEC_PER_SEC 1000000000 +#define INT_TYPE_MAX(type) ((1ull << (sizeof(type) * 8 - 1)) - 1) + +VkResult anv_WaitForFences( + VkDevice _device, + uint32_t fenceCount, + const VkFence* pFences, + VkBool32 waitAll, + uint64_t _timeout) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + int ret; + + if (unlikely(device->lost)) + return VK_ERROR_DEVICE_LOST; + + /* DRM_IOCTL_I915_GEM_WAIT uses a signed 64 bit timeout and is supposed + * to block indefinitely timeouts <= 0. Unfortunately, this was broken + * for a couple of kernel releases. Since there's no way to know + * whether or not the kernel we're using is one of the broken ones, the + * best we can do is to clamp the timeout to INT64_MAX. This limits the + * maximum timeout from 584 years to 292 years - likely not a big deal. + */ + int64_t timeout = MIN2(_timeout, INT64_MAX); + + VkResult result = VK_SUCCESS; + uint32_t pending_fences = fenceCount; + while (pending_fences) { + pending_fences = 0; + bool signaled_fences = false; + for (uint32_t i = 0; i < fenceCount; i++) { + ANV_FROM_HANDLE(anv_fence, fence, pFences[i]); + switch (fence->state) { + case ANV_FENCE_STATE_RESET: + /* This fence hasn't been submitted yet, we'll catch it the next + * time around. Yes, this may mean we dead-loop but, short of + * lots of locking and a condition variable, there's not much that + * we can do about that. + */ + pending_fences++; + continue; + + case ANV_FENCE_STATE_SIGNALED: + /* This fence is not pending. If waitAll isn't set, we can return + * early. Otherwise, we have to keep going. + */ + if (!waitAll) { + result = VK_SUCCESS; + goto done; + } + continue; + + case ANV_FENCE_STATE_SUBMITTED: + /* These are the fences we really care about. Go ahead and wait + * on it until we hit a timeout. + */ + result = anv_device_wait(device, &fence->bo, timeout); + switch (result) { + case VK_SUCCESS: + fence->state = ANV_FENCE_STATE_SIGNALED; + signaled_fences = true; + if (!waitAll) + goto done; + break; + + case VK_TIMEOUT: + goto done; + + default: + return result; + } + } + } + + if (pending_fences && !signaled_fences) { + /* If we've hit this then someone decided to vkWaitForFences before + * they've actually submitted any of them to a queue. This is a + * fairly pessimal case, so it's ok to lock here and use a standard + * pthreads condition variable. + */ + pthread_mutex_lock(&device->mutex); + + /* It's possible that some of the fences have changed state since the + * last time we checked. Now that we have the lock, check for + * pending fences again and don't wait if it's changed. + */ + uint32_t now_pending_fences = 0; + for (uint32_t i = 0; i < fenceCount; i++) { + ANV_FROM_HANDLE(anv_fence, fence, pFences[i]); + if (fence->state == ANV_FENCE_STATE_RESET) + now_pending_fences++; + } + assert(now_pending_fences <= pending_fences); + + if (now_pending_fences == pending_fences) { + struct timespec before; + clock_gettime(CLOCK_MONOTONIC, &before); + + uint32_t abs_nsec = before.tv_nsec + timeout % NSEC_PER_SEC; + uint64_t abs_sec = before.tv_sec + (abs_nsec / NSEC_PER_SEC) + + (timeout / NSEC_PER_SEC); + abs_nsec %= NSEC_PER_SEC; + + /* Avoid roll-over in tv_sec on 32-bit systems if the user + * provided timeout is UINT64_MAX + */ + struct timespec abstime; + abstime.tv_nsec = abs_nsec; + abstime.tv_sec = MIN2(abs_sec, INT_TYPE_MAX(abstime.tv_sec)); + + ret = pthread_cond_timedwait(&device->queue_submit, + &device->mutex, &abstime); + assert(ret != EINVAL); + + struct timespec after; + clock_gettime(CLOCK_MONOTONIC, &after); + uint64_t time_elapsed = + ((uint64_t)after.tv_sec * NSEC_PER_SEC + after.tv_nsec) - + ((uint64_t)before.tv_sec * NSEC_PER_SEC + before.tv_nsec); + + if (time_elapsed >= timeout) { + pthread_mutex_unlock(&device->mutex); + result = VK_TIMEOUT; + goto done; + } + + timeout -= time_elapsed; + } + + pthread_mutex_unlock(&device->mutex); + } + } + +done: + if (unlikely(device->lost)) + return VK_ERROR_DEVICE_LOST; + + return result; +} + +// Queue semaphore functions + +VkResult anv_CreateSemaphore( + VkDevice device, + const VkSemaphoreCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkSemaphore* pSemaphore) +{ + /* The DRM execbuffer ioctl always execute in-oder, even between different + * rings. As such, there's nothing to do for the user space semaphore. + */ + + *pSemaphore = (VkSemaphore)1; + + return VK_SUCCESS; +} + +void anv_DestroySemaphore( + VkDevice device, + VkSemaphore semaphore, + const VkAllocationCallbacks* pAllocator) +{ +} -- 2.30.2