From d115b67712d6db1eff9d3a4bb57a585c0158be74 Mon Sep 17 00:00:00 2001 From: Bas Nieuwenhuizen Date: Fri, 27 Jan 2017 00:19:52 +0100 Subject: [PATCH] radv/amdgpu: Support a preamble CS. Signed-off-by: Bas Nieuwenhuizen Reviewed-by: Dave Airlie --- src/amd/vulkan/radv_device.c | 6 +- src/amd/vulkan/radv_radeon_winsys.h | 1 + src/amd/vulkan/radv_wsi.c | 2 +- src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c | 62 +++++++++++++++---- 4 files changed, 56 insertions(+), 15 deletions(-) diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c index 1505498c4bf..ad83f9f4eb1 100644 --- a/src/amd/vulkan/radv_device.c +++ b/src/amd/vulkan/radv_device.c @@ -1021,7 +1021,8 @@ VkResult radv_QueueSubmit( if (queue->device->trace_bo) *queue->device->trace_id_ptr = 0; - ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, cs_array + j, advance, + ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, cs_array + j, + advance, NULL, (struct radeon_winsys_sem **)pSubmits[i].pWaitSemaphores, b ? pSubmits[i].waitSemaphoreCount : 0, (struct radeon_winsys_sem **)pSubmits[i].pSignalSemaphores, @@ -1052,7 +1053,8 @@ VkResult radv_QueueSubmit( if (!submitCount) ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, &queue->device->empty_cs[queue->queue_family_index], - 1, NULL, 0, NULL, 0, false, base_fence); + 1, NULL, NULL, 0, NULL, 0, + false, base_fence); fence->submitted = true; } diff --git a/src/amd/vulkan/radv_radeon_winsys.h b/src/amd/vulkan/radv_radeon_winsys.h index a0b5092e300..bdb14395d0a 100644 --- a/src/amd/vulkan/radv_radeon_winsys.h +++ b/src/amd/vulkan/radv_radeon_winsys.h @@ -305,6 +305,7 @@ struct radeon_winsys { int queue_index, struct radeon_winsys_cs **cs_array, unsigned cs_count, + struct radeon_winsys_cs *preamble_cs, struct radeon_winsys_sem **wait_sem, unsigned wait_sem_count, struct radeon_winsys_sem **signal_sem, diff --git a/src/amd/vulkan/radv_wsi.c b/src/amd/vulkan/radv_wsi.c index 2f45961cf8c..9c9e1bb0a8d 100644 --- a/src/amd/vulkan/radv_wsi.c +++ b/src/amd/vulkan/radv_wsi.c @@ -367,7 +367,7 @@ VkResult radv_QueuePresentKHR( struct radeon_winsys_ctx *ctx = queue->hw_ctx; queue->device->ws->cs_submit(ctx, queue->queue_idx, &queue->device->empty_cs[queue->queue_family_index], - 1, + 1, NULL, (struct radeon_winsys_sem **)pPresentInfo->pWaitSemaphores, pPresentInfo->waitSemaphoreCount, NULL, 0, false, base_fence); fence->submitted = true; diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c index f7707f6c793..b58f5db0622 100644 --- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c +++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c @@ -422,6 +422,7 @@ static int radv_amdgpu_create_bo_list(struct radv_amdgpu_winsys *ws, struct radeon_winsys_cs **cs_array, unsigned count, struct radv_amdgpu_winsys_bo *extra_bo, + struct radeon_winsys_cs *extra_cs, amdgpu_bo_list_handle *bo_list) { int r; @@ -448,7 +449,7 @@ static int radv_amdgpu_create_bo_list(struct radv_amdgpu_winsys *ws, bo_list); free(handles); pthread_mutex_unlock(&ws->global_bo_list_lock); - } else if (count == 1 && !extra_bo) { + } else if (count == 1 && !extra_bo && !extra_cs) { struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs*)cs_array[0]; r = amdgpu_bo_list_create(ws->dev, cs->num_buffers, cs->handles, cs->priorities, bo_list); @@ -460,6 +461,10 @@ static int radv_amdgpu_create_bo_list(struct radv_amdgpu_winsys *ws, total_buffer_count += cs->num_buffers; } + if (extra_cs) { + total_buffer_count += ((struct radv_amdgpu_cs*)extra_cs)->num_buffers; + } + amdgpu_bo_handle *handles = malloc(sizeof(amdgpu_bo_handle) * total_buffer_count); uint8_t *priorities = malloc(sizeof(uint8_t) * total_buffer_count); if (!handles || !priorities) { @@ -473,8 +478,14 @@ static int radv_amdgpu_create_bo_list(struct radv_amdgpu_winsys *ws, priorities[0] = 8; } - for (unsigned i = 0; i < count; ++i) { - struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs*)cs_array[i]; + for (unsigned i = 0; i < count + !!extra_cs; ++i) { + struct radv_amdgpu_cs *cs; + + if (i == count) + cs = (struct radv_amdgpu_cs*)extra_cs; + else + cs = (struct radv_amdgpu_cs*)cs_array[i]; + for (unsigned j = 0; j < cs->num_buffers; ++j) { bool found = false; for (unsigned k = 0; k < unique_bo_count; ++k) { @@ -514,6 +525,7 @@ static int radv_amdgpu_winsys_cs_submit_chained(struct radeon_winsys_ctx *_ctx, int queue_idx, struct radeon_winsys_cs **cs_array, unsigned cs_count, + struct radeon_winsys_cs *preamble_cs, struct radeon_winsys_fence *_fence) { int r; @@ -522,6 +534,7 @@ static int radv_amdgpu_winsys_cs_submit_chained(struct radeon_winsys_ctx *_ctx, struct radv_amdgpu_cs *cs0 = radv_amdgpu_cs(cs_array[0]); amdgpu_bo_list_handle bo_list; struct amdgpu_cs_request request = {0}; + struct amdgpu_cs_ib_info ibs[2]; for (unsigned i = cs_count; i--;) { struct radv_amdgpu_cs *cs = radv_amdgpu_cs(cs_array[i]); @@ -545,7 +558,7 @@ static int radv_amdgpu_winsys_cs_submit_chained(struct radeon_winsys_ctx *_ctx, } } - r = radv_amdgpu_create_bo_list(cs0->ws, cs_array, cs_count, NULL, &bo_list); + r = radv_amdgpu_create_bo_list(cs0->ws, cs_array, cs_count, NULL, preamble_cs, &bo_list); if (r) { fprintf(stderr, "amdgpu: Failed to created the BO list for submission\n"); return r; @@ -557,6 +570,13 @@ static int radv_amdgpu_winsys_cs_submit_chained(struct radeon_winsys_ctx *_ctx, request.ibs = &cs0->ib; request.resources = bo_list; + if (preamble_cs) { + request.ibs = ibs; + request.number_of_ibs = 2; + ibs[1] = cs0->ib; + ibs[0] = ((struct radv_amdgpu_cs*)preamble_cs)->ib; + } + r = amdgpu_cs_submit(ctx->ctx, 0, &request, 1); if (r) { if (r == -ENOMEM) @@ -580,6 +600,7 @@ static int radv_amdgpu_winsys_cs_submit_fallback(struct radeon_winsys_ctx *_ctx, int queue_idx, struct radeon_winsys_cs **cs_array, unsigned cs_count, + struct radeon_winsys_cs *preamble_cs, struct radeon_winsys_fence *_fence) { int r; @@ -593,11 +614,13 @@ static int radv_amdgpu_winsys_cs_submit_fallback(struct radeon_winsys_ctx *_ctx, for (unsigned i = 0; i < cs_count;) { struct radv_amdgpu_cs *cs0 = radv_amdgpu_cs(cs_array[i]); struct amdgpu_cs_ib_info ibs[AMDGPU_CS_MAX_IBS_PER_SUBMIT]; - unsigned cnt = MIN2(AMDGPU_CS_MAX_IBS_PER_SUBMIT, cs_count - i); + unsigned cnt = MIN2(AMDGPU_CS_MAX_IBS_PER_SUBMIT - !!preamble_cs, + cs_count - i); memset(&request, 0, sizeof(request)); - r = radv_amdgpu_create_bo_list(cs0->ws, &cs_array[i], cnt, NULL, &bo_list); + r = radv_amdgpu_create_bo_list(cs0->ws, &cs_array[i], cnt, NULL, + preamble_cs, &bo_list); if (r) { fprintf(stderr, "amdgpu: Failed to created the BO list for submission\n"); return r; @@ -606,12 +629,16 @@ static int radv_amdgpu_winsys_cs_submit_fallback(struct radeon_winsys_ctx *_ctx, request.ip_type = cs0->hw_ip; request.ring = queue_idx; request.resources = bo_list; - request.number_of_ibs = cnt; + request.number_of_ibs = cnt + !!preamble_cs; request.ibs = ibs; + if (preamble_cs) { + ibs[0] = radv_amdgpu_cs(preamble_cs)->ib; + } + for (unsigned j = 0; j < cnt; ++j) { struct radv_amdgpu_cs *cs = radv_amdgpu_cs(cs_array[i + j]); - ibs[j] = cs->ib; + ibs[j + !!preamble_cs] = cs->ib; if (cs->is_chained) { *cs->ib_size_ptr -= 4; @@ -647,6 +674,7 @@ static int radv_amdgpu_winsys_cs_submit_sysmem(struct radeon_winsys_ctx *_ctx, int queue_idx, struct radeon_winsys_cs **cs_array, unsigned cs_count, + struct radeon_winsys_cs *preamble_cs, struct radeon_winsys_fence *_fence) { int r; @@ -670,6 +698,9 @@ static int radv_amdgpu_winsys_cs_submit_sysmem(struct radeon_winsys_ctx *_ctx, unsigned cnt = 0; unsigned size = 0; + if (preamble_cs) + size += preamble_cs->cdw; + while (i + cnt < cs_count && 0xffff8 - size >= radv_amdgpu_cs(cs_array[i + cnt])->base.cdw) { size += radv_amdgpu_cs(cs_array[i + cnt])->base.cdw; ++cnt; @@ -680,6 +711,11 @@ static int radv_amdgpu_winsys_cs_submit_sysmem(struct radeon_winsys_ctx *_ctx, bo = ws->buffer_create(ws, 4 * size, 4096, RADEON_DOMAIN_GTT, RADEON_FLAG_CPU_ACCESS); ptr = ws->buffer_map(bo); + if (preamble_cs) { + memcpy(ptr, preamble_cs->buf, preamble_cs->cdw * 4); + ptr += preamble_cs->cdw; + } + for (unsigned j = 0; j < cnt; ++j) { struct radv_amdgpu_cs *cs = radv_amdgpu_cs(cs_array[i + j]); memcpy(ptr, cs->base.buf, 4 * cs->base.cdw); @@ -696,7 +732,8 @@ static int radv_amdgpu_winsys_cs_submit_sysmem(struct radeon_winsys_ctx *_ctx, r = radv_amdgpu_create_bo_list(cs0->ws, &cs_array[i], cnt, - (struct radv_amdgpu_winsys_bo*)bo, &bo_list); + (struct radv_amdgpu_winsys_bo*)bo, + preamble_cs, &bo_list); if (r) { fprintf(stderr, "amdgpu: Failed to created the BO list for submission\n"); return r; @@ -740,6 +777,7 @@ static int radv_amdgpu_winsys_cs_submit(struct radeon_winsys_ctx *_ctx, int queue_idx, struct radeon_winsys_cs **cs_array, unsigned cs_count, + struct radeon_winsys_cs *preamble_cs, struct radeon_winsys_sem **wait_sem, unsigned wait_sem_count, struct radeon_winsys_sem **signal_sem, @@ -759,13 +797,13 @@ static int radv_amdgpu_winsys_cs_submit(struct radeon_winsys_ctx *_ctx, } if (!cs->ws->use_ib_bos) { ret = radv_amdgpu_winsys_cs_submit_sysmem(_ctx, queue_idx, cs_array, - cs_count, _fence); + cs_count, preamble_cs, _fence); } else if (can_patch && cs_count > AMDGPU_CS_MAX_IBS_PER_SUBMIT && false) { ret = radv_amdgpu_winsys_cs_submit_chained(_ctx, queue_idx, cs_array, - cs_count, _fence); + cs_count, preamble_cs, _fence); } else { ret = radv_amdgpu_winsys_cs_submit_fallback(_ctx, queue_idx, cs_array, - cs_count, _fence); + cs_count, preamble_cs, _fence); } for (i = 0; i < signal_sem_count; i++) { -- 2.30.2