radv: Special case the initial preamble.
authorBas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Mon, 20 Feb 2017 08:08:31 +0000 (09:08 +0100)
committerBas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Tue, 21 Feb 2017 08:19:53 +0000 (09:19 +0100)
For flushing we don't want to flush every third IB.

Signed-off-by: Bas Nieuwenhuizen <basni@google.com>
Reviewed-by: Dave Airlie <airlied@redhat.com>
src/amd/vulkan/radv_device.c
src/amd/vulkan/radv_radeon_winsys.h
src/amd/vulkan/radv_wsi.c
src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c

index 222fc7ae77a5a6b43c33635f99f7b362f46d4ed2..8b8e4ddfb0b1c7636381c15586d16641f02eb822 100644 (file)
@@ -1471,7 +1471,7 @@ VkResult radv_QueueSubmit(
                        if (pSubmits[i].waitSemaphoreCount || pSubmits[i].signalSemaphoreCount) {
                                ret = queue->device->ws->cs_submit(ctx, queue->queue_idx,
                                                                   &queue->device->empty_cs[queue->queue_family_index],
-                                                                  1, NULL,
+                                                                  1, NULL, NULL,
                                                                   (struct radeon_winsys_sem **)pSubmits[i].pWaitSemaphores,
                                                                   pSubmits[i].waitSemaphoreCount,
                                                                   (struct radeon_winsys_sem **)pSubmits[i].pSignalSemaphores,
@@ -1509,7 +1509,7 @@ VkResult radv_QueueSubmit(
                                *queue->device->trace_id_ptr = 0;
 
                        ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, cs_array + j,
-                                                       advance, preamble_cs,
+                                                       advance, preamble_cs, preamble_cs,
                                                        (struct radeon_winsys_sem **)pSubmits[i].pWaitSemaphores,
                                                        b ? pSubmits[i].waitSemaphoreCount : 0,
                                                        (struct radeon_winsys_sem **)pSubmits[i].pSignalSemaphores,
@@ -1541,7 +1541,7 @@ VkResult radv_QueueSubmit(
                if (!fence_emitted)
                        ret = queue->device->ws->cs_submit(ctx, queue->queue_idx,
                                                           &queue->device->empty_cs[queue->queue_family_index],
-                                                          1, NULL, NULL, 0, NULL, 0,
+                                                          1, NULL, NULL, NULL, 0, NULL, 0,
                                                           false, base_fence);
 
                fence->submitted = true;
index bdb14395d0a8434a4bc4053f53ddd599f582947f..8cf29a38d9474de0fe7c337a87e52dc20941f82c 100644 (file)
@@ -305,7 +305,8 @@ struct radeon_winsys {
                         int queue_index,
                         struct radeon_winsys_cs **cs_array,
                         unsigned cs_count,
-                        struct radeon_winsys_cs *preamble_cs,
+                        struct radeon_winsys_cs *initial_preamble_cs,
+                        struct radeon_winsys_cs *continue_preamble_cs,
                         struct radeon_winsys_sem **wait_sem,
                         unsigned wait_sem_count,
                         struct radeon_winsys_sem **signal_sem,
index ea8e7849722c6922f2a42c910e728025d56ee3a0..c8a1996b05e4de40a8dc4826390acac8683187b8 100644 (file)
@@ -367,7 +367,7 @@ VkResult radv_QueuePresentKHR(
                struct radeon_winsys_ctx *ctx = queue->hw_ctx;
                queue->device->ws->cs_submit(ctx, queue->queue_idx,
                                             &queue->device->empty_cs[queue->queue_family_index],
-                                            1, NULL,
+                                            1, NULL, NULL,
                                             (struct radeon_winsys_sem **)pPresentInfo->pWaitSemaphores,
                                             pPresentInfo->waitSemaphoreCount, NULL, 0, false, base_fence);
                fence->submitted = true;
index 9e468bd4ca8ddd6d7ab388cf1a3ca2f6d913bcae..80100e6eabb42fb809f6ff2c4d1088dce44b0c47 100644 (file)
@@ -535,7 +535,8 @@ static int radv_amdgpu_winsys_cs_submit_chained(struct radeon_winsys_ctx *_ctx,
                                                int queue_idx,
                                                struct radeon_winsys_cs **cs_array,
                                                unsigned cs_count,
-                                               struct radeon_winsys_cs *preamble_cs,
+                                               struct radeon_winsys_cs *initial_preamble_cs,
+                                               struct radeon_winsys_cs *continue_preamble_cs,
                                                struct radeon_winsys_fence *_fence)
 {
        int r;
@@ -568,7 +569,7 @@ static int radv_amdgpu_winsys_cs_submit_chained(struct radeon_winsys_ctx *_ctx,
                }
        }
 
-       r = radv_amdgpu_create_bo_list(cs0->ws, cs_array, cs_count, NULL, preamble_cs, &bo_list);
+       r = radv_amdgpu_create_bo_list(cs0->ws, cs_array, cs_count, NULL, initial_preamble_cs, &bo_list);
        if (r) {
                fprintf(stderr, "amdgpu: Failed to created the BO list for submission\n");
                return r;
@@ -580,11 +581,11 @@ static int radv_amdgpu_winsys_cs_submit_chained(struct radeon_winsys_ctx *_ctx,
        request.ibs = &cs0->ib;
        request.resources = bo_list;
 
-       if (preamble_cs) {
+       if (initial_preamble_cs) {
                request.ibs = ibs;
                request.number_of_ibs = 2;
                ibs[1] = cs0->ib;
-               ibs[0] = ((struct radv_amdgpu_cs*)preamble_cs)->ib;
+               ibs[0] = ((struct radv_amdgpu_cs*)initial_preamble_cs)->ib;
        }
 
        r = amdgpu_cs_submit(ctx->ctx, 0, &request, 1);
@@ -610,7 +611,8 @@ static int radv_amdgpu_winsys_cs_submit_fallback(struct radeon_winsys_ctx *_ctx,
                                                 int queue_idx,
                                                 struct radeon_winsys_cs **cs_array,
                                                 unsigned cs_count,
-                                                struct radeon_winsys_cs *preamble_cs,
+                                                struct radeon_winsys_cs *initial_preamble_cs,
+                                                struct radeon_winsys_cs *continue_preamble_cs,
                                                 struct radeon_winsys_fence *_fence)
 {
        int r;
@@ -624,6 +626,7 @@ static int radv_amdgpu_winsys_cs_submit_fallback(struct radeon_winsys_ctx *_ctx,
        for (unsigned i = 0; i < cs_count;) {
                struct radv_amdgpu_cs *cs0 = radv_amdgpu_cs(cs_array[i]);
                struct amdgpu_cs_ib_info ibs[AMDGPU_CS_MAX_IBS_PER_SUBMIT];
+               struct radeon_winsys_cs *preamble_cs = i ? continue_preamble_cs : initial_preamble_cs;
                unsigned cnt = MIN2(AMDGPU_CS_MAX_IBS_PER_SUBMIT - !!preamble_cs,
                                    cs_count - i);
 
@@ -684,7 +687,8 @@ static int radv_amdgpu_winsys_cs_submit_sysmem(struct radeon_winsys_ctx *_ctx,
                                               int queue_idx,
                                               struct radeon_winsys_cs **cs_array,
                                               unsigned cs_count,
-                                              struct radeon_winsys_cs *preamble_cs,
+                                              struct radeon_winsys_cs *initial_preamble_cs,
+                                              struct radeon_winsys_cs *continue_preamble_cs,
                                               struct radeon_winsys_fence *_fence)
 {
        int r;
@@ -704,6 +708,7 @@ static int radv_amdgpu_winsys_cs_submit_sysmem(struct radeon_winsys_ctx *_ctx,
        for (unsigned i = 0; i < cs_count;) {
                struct amdgpu_cs_ib_info ib = {0};
                struct radeon_winsys_bo *bo = NULL;
+               struct radeon_winsys_cs *preamble_cs = i ? continue_preamble_cs : initial_preamble_cs;
                uint32_t *ptr;
                unsigned cnt = 0;
                unsigned size = 0;
@@ -787,7 +792,8 @@ static int radv_amdgpu_winsys_cs_submit(struct radeon_winsys_ctx *_ctx,
                                        int queue_idx,
                                        struct radeon_winsys_cs **cs_array,
                                        unsigned cs_count,
-                                       struct radeon_winsys_cs *preamble_cs,
+                                       struct radeon_winsys_cs *initial_preamble_cs,
+                                       struct radeon_winsys_cs *continue_preamble_cs,
                                        struct radeon_winsys_sem **wait_sem,
                                        unsigned wait_sem_count,
                                        struct radeon_winsys_sem **signal_sem,
@@ -807,13 +813,13 @@ static int radv_amdgpu_winsys_cs_submit(struct radeon_winsys_ctx *_ctx,
        }
        if (!cs->ws->use_ib_bos) {
                ret = radv_amdgpu_winsys_cs_submit_sysmem(_ctx, queue_idx, cs_array,
-                                                          cs_count, preamble_cs, _fence);
+                                                          cs_count, initial_preamble_cs, continue_preamble_cs, _fence);
        } else if (can_patch && cs_count > AMDGPU_CS_MAX_IBS_PER_SUBMIT && false) {
                ret = radv_amdgpu_winsys_cs_submit_chained(_ctx, queue_idx, cs_array,
-                                                           cs_count, preamble_cs, _fence);
+                                                           cs_count, initial_preamble_cs, continue_preamble_cs, _fence);
        } else {
                ret = radv_amdgpu_winsys_cs_submit_fallback(_ctx, queue_idx, cs_array,
-                                                            cs_count, preamble_cs, _fence);
+                                                            cs_count, initial_preamble_cs, continue_preamble_cs, _fence);
        }
 
        for (i = 0; i < signal_sem_count; i++) {