From: Jason Ekstrand Date: Thu, 3 Aug 2017 18:46:09 +0000 (-0700) Subject: anv: Rework fences to work more like BO semaphores X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=f992bb205c174e0a8e6574258598f5268dd0e0cb;p=mesa.git anv: Rework fences to work more like BO semaphores This commit changes fences to work a bit more like BO semaphores. Instead of the fence being a batch, it's simply a BO that gets added to the validation list for the last execbuf call in the QueueSubmit operation. It's a bit annoying finding the last submit in the execbuf but this allows us to avoid the dummy execbuf. Reviewed-by: Lionel Landwerlin --- diff --git a/src/intel/vulkan/anv_batch_chain.c b/src/intel/vulkan/anv_batch_chain.c index 1e7455f71e1..ef6ada49ff3 100644 --- a/src/intel/vulkan/anv_batch_chain.c +++ b/src/intel/vulkan/anv_batch_chain.c @@ -1451,8 +1451,11 @@ anv_cmd_buffer_execbuf(struct anv_device *device, const VkSemaphore *in_semaphores, uint32_t num_in_semaphores, const VkSemaphore *out_semaphores, - uint32_t num_out_semaphores) + uint32_t num_out_semaphores, + VkFence _fence) { + ANV_FROM_HANDLE(anv_fence, fence, _fence); + struct anv_execbuf execbuf; anv_execbuf_init(&execbuf); @@ -1545,6 +1548,13 @@ anv_cmd_buffer_execbuf(struct anv_device *device, } } + if (fence) { + result = anv_execbuf_add_bo(&execbuf, &fence->bo, NULL, + EXEC_OBJECT_WRITE, &device->alloc); + if (result != VK_SUCCESS) + return result; + } + if (cmd_buffer) result = setup_execbuf_for_cmd_buffer(&execbuf, cmd_buffer); else @@ -1588,6 +1598,20 @@ anv_cmd_buffer_execbuf(struct anv_device *device, anv_semaphore_reset_temporary(device, semaphore); } + if (fence) { + /* Once the execbuf has returned, we need to set the fence state to + * SUBMITTED. We can't do this before calling execbuf because + * anv_GetFenceStatus does take the global device lock before checking + * fence->state. + * + * We set the fence state to SUBMITTED regardless of whether or not the + * execbuf succeeds because we need to ensure that vkWaitForFences() and + * vkGetFenceStatus() return a valid result (VK_ERROR_DEVICE_LOST or + * VK_SUCCESS) in a finite amount of time even if execbuf fails. + */ + fence->state = ANV_FENCE_STATE_SUBMITTED; + } + if (result == VK_SUCCESS && need_out_fence) { int out_fence = execbuf.execbuf.rsvd2 >> 32; for (uint32_t i = 0; i < num_out_semaphores; i++) { diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 6b2414429f3..715e0adebfb 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -1642,7 +1642,8 @@ VkResult anv_cmd_buffer_execbuf(struct anv_device *device, const VkSemaphore *in_semaphores, uint32_t num_in_semaphores, const VkSemaphore *out_semaphores, - uint32_t num_out_semaphores); + uint32_t num_out_semaphores, + VkFence fence); VkResult anv_cmd_buffer_reset(struct anv_cmd_buffer *cmd_buffer); @@ -1720,8 +1721,6 @@ enum anv_fence_state { struct anv_fence { struct anv_bo bo; - struct drm_i915_gem_execbuffer2 execbuf; - struct drm_i915_gem_exec_object2 exec2_objects[1]; enum anv_fence_state state; }; diff --git a/src/intel/vulkan/anv_queue.c b/src/intel/vulkan/anv_queue.c index 03769beccdd..5023172250a 100644 --- a/src/intel/vulkan/anv_queue.c +++ b/src/intel/vulkan/anv_queue.c @@ -114,10 +114,9 @@ VkResult anv_QueueSubmit( VkQueue _queue, uint32_t submitCount, const VkSubmitInfo* pSubmits, - VkFence _fence) + VkFence fence) { ANV_FROM_HANDLE(anv_queue, queue, _queue); - ANV_FROM_HANDLE(anv_fence, fence, _fence); struct anv_device *device = queue->device; /* Query for device status prior to submitting. Technically, we don't need @@ -158,7 +157,20 @@ VkResult anv_QueueSubmit( */ pthread_mutex_lock(&device->mutex); + if (fence && submitCount == 0) { + /* If we don't have any command buffers, we need to submit a dummy + * batch to give GEM something to wait on. We could, potentially, + * come up with something more efficient but this shouldn't be a + * common case. + */ + result = anv_cmd_buffer_execbuf(device, NULL, NULL, 0, NULL, 0, fence); + goto out; + } + for (uint32_t i = 0; i < submitCount; i++) { + /* Fence for this submit. NULL for all but the last one */ + VkFence submit_fence = (i == submitCount - 1) ? fence : NULL; + if (pSubmits[i].commandBufferCount == 0) { /* If we don't have any command buffers, we need to submit a dummy * batch to give GEM something to wait on. We could, potentially, @@ -169,7 +181,8 @@ VkResult anv_QueueSubmit( pSubmits[i].pWaitSemaphores, pSubmits[i].waitSemaphoreCount, pSubmits[i].pSignalSemaphores, - pSubmits[i].signalSemaphoreCount); + pSubmits[i].signalSemaphoreCount, + submit_fence); if (result != VK_SUCCESS) goto out; @@ -182,6 +195,10 @@ VkResult anv_QueueSubmit( assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY); assert(!anv_batch_has_error(&cmd_buffer->batch)); + /* Fence for this execbuf. NULL for all but the last one */ + VkFence execbuf_fence = + (j == pSubmits[i].commandBufferCount - 1) ? submit_fence : NULL; + const VkSemaphore *in_semaphores = NULL, *out_semaphores = NULL; uint32_t num_in_semaphores = 0, num_out_semaphores = 0; if (j == 0) { @@ -198,23 +215,14 @@ VkResult anv_QueueSubmit( result = anv_cmd_buffer_execbuf(device, cmd_buffer, in_semaphores, num_in_semaphores, - out_semaphores, num_out_semaphores); + out_semaphores, num_out_semaphores, + execbuf_fence); if (result != VK_SUCCESS) goto out; } } - if (fence) { - struct anv_bo *fence_bo = &fence->bo; - result = anv_device_execbuf(device, &fence->execbuf, &fence_bo); - if (result != VK_SUCCESS) - goto out; - - /* Update the fence and wake up any waiters */ - assert(fence->state == ANV_FENCE_STATE_RESET); - fence->state = ANV_FENCE_STATE_SUBMITTED; - pthread_cond_broadcast(&device->queue_submit); - } + pthread_cond_broadcast(&device->queue_submit); out: if (result != VK_SUCCESS) { @@ -232,15 +240,6 @@ out: */ result = vk_errorf(VK_ERROR_DEVICE_LOST, "vkQueueSubmit() failed"); device->lost = true; - - /* If we return VK_ERROR_DEVICE LOST here, we need to ensure that - * vkWaitForFences() and vkGetFenceStatus() return a valid result - * (VK_SUCCESS or VK_ERROR_DEVICE_LOST) in a finite amount of time. - * Setting the fence status to SIGNALED ensures this will happen in - * any case. - */ - if (fence) - fence->state = ANV_FENCE_STATE_SIGNALED; } pthread_mutex_unlock(&device->mutex); @@ -265,12 +264,10 @@ VkResult anv_CreateFence( ANV_FROM_HANDLE(anv_device, device, _device); struct anv_bo fence_bo; struct anv_fence *fence; - struct anv_batch batch; - VkResult result; assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FENCE_CREATE_INFO); - result = anv_bo_pool_alloc(&device->batch_bo_pool, &fence_bo, 4096); + VkResult result = anv_bo_pool_alloc(&device->batch_bo_pool, &fence_bo, 4096); if (result != VK_SUCCESS) return result; @@ -278,43 +275,6 @@ VkResult anv_CreateFence( fence = fence_bo.map; fence->bo = fence_bo; - /* Place the batch after the CPU data but on its own cache line. */ - const uint32_t batch_offset = align_u32(sizeof(*fence), CACHELINE_SIZE); - batch.next = batch.start = fence->bo.map + batch_offset; - batch.end = fence->bo.map + fence->bo.size; - anv_batch_emit(&batch, GEN7_MI_BATCH_BUFFER_END, bbe); - anv_batch_emit(&batch, GEN7_MI_NOOP, noop); - - if (!device->info.has_llc) { - assert(((uintptr_t) batch.start & CACHELINE_MASK) == 0); - assert(batch.next - batch.start <= CACHELINE_SIZE); - __builtin_ia32_mfence(); - __builtin_ia32_clflush(batch.start); - } - - fence->exec2_objects[0].handle = fence->bo.gem_handle; - fence->exec2_objects[0].relocation_count = 0; - fence->exec2_objects[0].relocs_ptr = 0; - fence->exec2_objects[0].alignment = 0; - fence->exec2_objects[0].offset = fence->bo.offset; - fence->exec2_objects[0].flags = 0; - fence->exec2_objects[0].rsvd1 = 0; - fence->exec2_objects[0].rsvd2 = 0; - - fence->execbuf.buffers_ptr = (uintptr_t) fence->exec2_objects; - fence->execbuf.buffer_count = 1; - fence->execbuf.batch_start_offset = batch.start - fence->bo.map; - fence->execbuf.batch_len = batch.next - batch.start; - fence->execbuf.cliprects_ptr = 0; - fence->execbuf.num_cliprects = 0; - fence->execbuf.DR1 = 0; - fence->execbuf.DR4 = 0; - - fence->execbuf.flags = - I915_EXEC_HANDLE_LUT | I915_EXEC_NO_RELOC | I915_EXEC_RENDER; - fence->execbuf.rsvd1 = device->context_id; - fence->execbuf.rsvd2 = 0; - if (pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT) { fence->state = ANV_FENCE_STATE_SIGNALED; } else {