From c121739c4772a9442bc6866abae9306773abe3de Mon Sep 17 00:00:00 2001 From: Bas Nieuwenhuizen Date: Mon, 20 Feb 2017 09:08:31 +0100 Subject: [PATCH] radv: Special case the initial preamble. For flushing we don't want to flush every third IB. Signed-off-by: Bas Nieuwenhuizen Reviewed-by: Dave Airlie --- src/amd/vulkan/radv_device.c | 6 ++--- src/amd/vulkan/radv_radeon_winsys.h | 3 ++- src/amd/vulkan/radv_wsi.c | 2 +- src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c | 26 ++++++++++++------- 4 files changed, 22 insertions(+), 15 deletions(-) diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c index 222fc7ae77a..8b8e4ddfb0b 100644 --- a/src/amd/vulkan/radv_device.c +++ b/src/amd/vulkan/radv_device.c @@ -1471,7 +1471,7 @@ VkResult radv_QueueSubmit( if (pSubmits[i].waitSemaphoreCount || pSubmits[i].signalSemaphoreCount) { ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, &queue->device->empty_cs[queue->queue_family_index], - 1, NULL, + 1, NULL, NULL, (struct radeon_winsys_sem **)pSubmits[i].pWaitSemaphores, pSubmits[i].waitSemaphoreCount, (struct radeon_winsys_sem **)pSubmits[i].pSignalSemaphores, @@ -1509,7 +1509,7 @@ VkResult radv_QueueSubmit( *queue->device->trace_id_ptr = 0; ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, cs_array + j, - advance, preamble_cs, + advance, preamble_cs, preamble_cs, (struct radeon_winsys_sem **)pSubmits[i].pWaitSemaphores, b ? pSubmits[i].waitSemaphoreCount : 0, (struct radeon_winsys_sem **)pSubmits[i].pSignalSemaphores, @@ -1541,7 +1541,7 @@ VkResult radv_QueueSubmit( if (!fence_emitted) ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, &queue->device->empty_cs[queue->queue_family_index], - 1, NULL, NULL, 0, NULL, 0, + 1, NULL, NULL, NULL, 0, NULL, 0, false, base_fence); fence->submitted = true; diff --git a/src/amd/vulkan/radv_radeon_winsys.h b/src/amd/vulkan/radv_radeon_winsys.h index bdb14395d0a..8cf29a38d94 100644 --- a/src/amd/vulkan/radv_radeon_winsys.h +++ b/src/amd/vulkan/radv_radeon_winsys.h @@ -305,7 +305,8 @@ struct radeon_winsys { int queue_index, struct radeon_winsys_cs **cs_array, unsigned cs_count, - struct radeon_winsys_cs *preamble_cs, + struct radeon_winsys_cs *initial_preamble_cs, + struct radeon_winsys_cs *continue_preamble_cs, struct radeon_winsys_sem **wait_sem, unsigned wait_sem_count, struct radeon_winsys_sem **signal_sem, diff --git a/src/amd/vulkan/radv_wsi.c b/src/amd/vulkan/radv_wsi.c index ea8e7849722..c8a1996b05e 100644 --- a/src/amd/vulkan/radv_wsi.c +++ b/src/amd/vulkan/radv_wsi.c @@ -367,7 +367,7 @@ VkResult radv_QueuePresentKHR( struct radeon_winsys_ctx *ctx = queue->hw_ctx; queue->device->ws->cs_submit(ctx, queue->queue_idx, &queue->device->empty_cs[queue->queue_family_index], - 1, NULL, + 1, NULL, NULL, (struct radeon_winsys_sem **)pPresentInfo->pWaitSemaphores, pPresentInfo->waitSemaphoreCount, NULL, 0, false, base_fence); fence->submitted = true; diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c index 9e468bd4ca8..80100e6eabb 100644 --- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c +++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c @@ -535,7 +535,8 @@ static int radv_amdgpu_winsys_cs_submit_chained(struct radeon_winsys_ctx *_ctx, int queue_idx, struct radeon_winsys_cs **cs_array, unsigned cs_count, - struct radeon_winsys_cs *preamble_cs, + struct radeon_winsys_cs *initial_preamble_cs, + struct radeon_winsys_cs *continue_preamble_cs, struct radeon_winsys_fence *_fence) { int r; @@ -568,7 +569,7 @@ static int radv_amdgpu_winsys_cs_submit_chained(struct radeon_winsys_ctx *_ctx, } } - r = radv_amdgpu_create_bo_list(cs0->ws, cs_array, cs_count, NULL, preamble_cs, &bo_list); + r = radv_amdgpu_create_bo_list(cs0->ws, cs_array, cs_count, NULL, initial_preamble_cs, &bo_list); if (r) { fprintf(stderr, "amdgpu: Failed to created the BO list for submission\n"); return r; @@ -580,11 +581,11 @@ static int radv_amdgpu_winsys_cs_submit_chained(struct radeon_winsys_ctx *_ctx, request.ibs = &cs0->ib; request.resources = bo_list; - if (preamble_cs) { + if (initial_preamble_cs) { request.ibs = ibs; request.number_of_ibs = 2; ibs[1] = cs0->ib; - ibs[0] = ((struct radv_amdgpu_cs*)preamble_cs)->ib; + ibs[0] = ((struct radv_amdgpu_cs*)initial_preamble_cs)->ib; } r = amdgpu_cs_submit(ctx->ctx, 0, &request, 1); @@ -610,7 +611,8 @@ static int radv_amdgpu_winsys_cs_submit_fallback(struct radeon_winsys_ctx *_ctx, int queue_idx, struct radeon_winsys_cs **cs_array, unsigned cs_count, - struct radeon_winsys_cs *preamble_cs, + struct radeon_winsys_cs *initial_preamble_cs, + struct radeon_winsys_cs *continue_preamble_cs, struct radeon_winsys_fence *_fence) { int r; @@ -624,6 +626,7 @@ static int radv_amdgpu_winsys_cs_submit_fallback(struct radeon_winsys_ctx *_ctx, for (unsigned i = 0; i < cs_count;) { struct radv_amdgpu_cs *cs0 = radv_amdgpu_cs(cs_array[i]); struct amdgpu_cs_ib_info ibs[AMDGPU_CS_MAX_IBS_PER_SUBMIT]; + struct radeon_winsys_cs *preamble_cs = i ? continue_preamble_cs : initial_preamble_cs; unsigned cnt = MIN2(AMDGPU_CS_MAX_IBS_PER_SUBMIT - !!preamble_cs, cs_count - i); @@ -684,7 +687,8 @@ static int radv_amdgpu_winsys_cs_submit_sysmem(struct radeon_winsys_ctx *_ctx, int queue_idx, struct radeon_winsys_cs **cs_array, unsigned cs_count, - struct radeon_winsys_cs *preamble_cs, + struct radeon_winsys_cs *initial_preamble_cs, + struct radeon_winsys_cs *continue_preamble_cs, struct radeon_winsys_fence *_fence) { int r; @@ -704,6 +708,7 @@ static int radv_amdgpu_winsys_cs_submit_sysmem(struct radeon_winsys_ctx *_ctx, for (unsigned i = 0; i < cs_count;) { struct amdgpu_cs_ib_info ib = {0}; struct radeon_winsys_bo *bo = NULL; + struct radeon_winsys_cs *preamble_cs = i ? continue_preamble_cs : initial_preamble_cs; uint32_t *ptr; unsigned cnt = 0; unsigned size = 0; @@ -787,7 +792,8 @@ static int radv_amdgpu_winsys_cs_submit(struct radeon_winsys_ctx *_ctx, int queue_idx, struct radeon_winsys_cs **cs_array, unsigned cs_count, - struct radeon_winsys_cs *preamble_cs, + struct radeon_winsys_cs *initial_preamble_cs, + struct radeon_winsys_cs *continue_preamble_cs, struct radeon_winsys_sem **wait_sem, unsigned wait_sem_count, struct radeon_winsys_sem **signal_sem, @@ -807,13 +813,13 @@ static int radv_amdgpu_winsys_cs_submit(struct radeon_winsys_ctx *_ctx, } if (!cs->ws->use_ib_bos) { ret = radv_amdgpu_winsys_cs_submit_sysmem(_ctx, queue_idx, cs_array, - cs_count, preamble_cs, _fence); + cs_count, initial_preamble_cs, continue_preamble_cs, _fence); } else if (can_patch && cs_count > AMDGPU_CS_MAX_IBS_PER_SUBMIT && false) { ret = radv_amdgpu_winsys_cs_submit_chained(_ctx, queue_idx, cs_array, - cs_count, preamble_cs, _fence); + cs_count, initial_preamble_cs, continue_preamble_cs, _fence); } else { ret = radv_amdgpu_winsys_cs_submit_fallback(_ctx, queue_idx, cs_array, - cs_count, preamble_cs, _fence); + cs_count, initial_preamble_cs, continue_preamble_cs, _fence); } for (i = 0; i < signal_sem_count; i++) { -- 2.30.2