From 8b47b97215af7157bc15676167cab73aa5a61a76 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 10 Feb 2017 00:20:44 +0000 Subject: [PATCH] radv: detect command buffers that do no work and drop them (v2) If a buffer is just full of flushes we flush things on command buffer submission, so don't bother submitting these. This will reduce some CPU overhead on dota2, which submits a fair few command streams that don't end up drawing anything. v2: reorganise loop to count first then malloc, rename some vars (Bas) Reviewed-by: Bas Nieuwenhuizen Signed-off-by: Dave Airlie --- src/amd/vulkan/radv_cmd_buffer.c | 3 +++ src/amd/vulkan/radv_device.c | 27 ++++++++++++++++++++------- src/amd/vulkan/radv_meta_buffer.c | 1 + src/amd/vulkan/radv_private.h | 2 ++ src/amd/vulkan/si_cmd_buffer.c | 2 +- 5 files changed, 27 insertions(+), 8 deletions(-) diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index f281f33dc73..25b1bd6a3e8 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -1277,6 +1277,7 @@ radv_cmd_buffer_flush_state(struct radv_cmd_buffer *cmd_buffer) MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 4096); + cmd_buffer->no_draws = false; if ((cmd_buffer->state.vertex_descriptors_dirty || cmd_buffer->state.vb_dirty) && cmd_buffer->state.pipeline->num_vertex_attribs) { unsigned vb_offset; @@ -1592,6 +1593,7 @@ static void radv_reset_cmd_buffer(struct radv_cmd_buffer *cmd_buffer) cmd_buffer->record_fail = false; cmd_buffer->ring_offsets_idx = -1; + cmd_buffer->no_draws = true; } VkResult radv_ResetCommandBuffer( @@ -2423,6 +2425,7 @@ void radv_CmdDrawIndexedIndirectCountAMD( static void radv_flush_compute_state(struct radv_cmd_buffer *cmd_buffer) { + cmd_buffer->no_draws = false; radv_emit_compute_pipeline(cmd_buffer); radv_flush_descriptors(cmd_buffer, cmd_buffer->state.compute_pipeline, VK_SHADER_STAGE_COMPUTE_BIT); diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c index fff31259028..9be09af1795 100644 --- a/src/amd/vulkan/radv_device.c +++ b/src/amd/vulkan/radv_device.c @@ -1452,8 +1452,18 @@ VkResult radv_QueueSubmit( struct radeon_winsys_cs **cs_array; bool can_patch = true; uint32_t advance; + int draw_cmd_buffers_count = 0; - if (!pSubmits[i].commandBufferCount) { + for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) { + RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, + pSubmits[i].pCommandBuffers[j]); + assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY); + if (cmd_buffer->no_draws == true) + continue; + draw_cmd_buffers_count++; + } + + if (!draw_cmd_buffers_count) { if (pSubmits[i].waitSemaphoreCount || pSubmits[i].signalSemaphoreCount) { ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, &queue->device->empty_cs[queue->queue_family_index], @@ -1472,24 +1482,27 @@ VkResult radv_QueueSubmit( continue; } - cs_array = malloc(sizeof(struct radeon_winsys_cs *) * - pSubmits[i].commandBufferCount); + cs_array = malloc(sizeof(struct radeon_winsys_cs *) * draw_cmd_buffers_count); + int draw_cmd_buffer_idx = 0; for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, pSubmits[i].pCommandBuffers[j]); assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY); + if (cmd_buffer->no_draws == true) + continue; - cs_array[j] = cmd_buffer->cs; + cs_array[draw_cmd_buffer_idx] = cmd_buffer->cs; + draw_cmd_buffer_idx++; if ((cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT)) can_patch = false; } - for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j += advance) { + for (uint32_t j = 0; j < draw_cmd_buffers_count; j += advance) { advance = MIN2(max_cs_submission, - pSubmits[i].commandBufferCount - j); + draw_cmd_buffers_count - j); bool b = j == 0; - bool e = j + advance == pSubmits[i].commandBufferCount; + bool e = j + advance == draw_cmd_buffers_count; if (queue->device->trace_bo) *queue->device->trace_id_ptr = 0; diff --git a/src/amd/vulkan/radv_meta_buffer.c b/src/amd/vulkan/radv_meta_buffer.c index cd2973fa4a9..4857d3dc54f 100644 --- a/src/amd/vulkan/radv_meta_buffer.c +++ b/src/amd/vulkan/radv_meta_buffer.c @@ -523,6 +523,7 @@ void radv_CmdUpdateBuffer( assert(!(dataSize & 3)); assert(!(va & 3)); + cmd_buffer->no_draws = false; if (dataSize < 4096) { cmd_buffer->device->ws->cs_add_buffer(cmd_buffer->cs, dst_buffer->bo, 8); diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index 25ed5dec7cf..9a88ce0d64d 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -750,6 +750,8 @@ struct radv_cmd_buffer { uint32_t gsvs_ring_size_needed; int ring_offsets_idx; /* just used for verification */ + + bool no_draws; }; struct radv_image; diff --git a/src/amd/vulkan/si_cmd_buffer.c b/src/amd/vulkan/si_cmd_buffer.c index 1c99b2282c6..b94c1f10797 100644 --- a/src/amd/vulkan/si_cmd_buffer.c +++ b/src/amd/vulkan/si_cmd_buffer.c @@ -828,7 +828,7 @@ static void si_emit_cp_dma_clear_buffer(struct radv_cmd_buffer *cmd_buffer, static void si_cp_dma_prepare(struct radv_cmd_buffer *cmd_buffer, uint64_t byte_count, uint64_t remaining_size, unsigned *flags) { - + cmd_buffer->no_draws = false; /* Flush the caches for the first copy only. * Also wait for the previous CP DMA operations. */ -- 2.30.2