From 24b95485dc814c84dbf53f4bf299f9b298a566c4 Mon Sep 17 00:00:00 2001 From: Brian Ho Date: Thu, 16 Jan 2020 12:15:45 -0500 Subject: [PATCH] turnip: Update query availability on render pass end Unlike on an immidiate-mode renderer, Turnip only renders tiles on vkCmdEndRenderPass. As such, we need to track all queries that were active in a given render pass and defer setting the available bit on those queries until after all tiles have rendered. This commit adds a draw_epilogue_cs to tu_cmd_buffer that is executed as an IB at the end of tu_CmdEndRenderPass. We then emit packets to this command stream that update the availability bit of a given query in tu_CmdEndQuery. Part-of: --- src/freedreno/vulkan/tu_cmd_buffer.c | 28 ++++++++++++++++++++++++++-- src/freedreno/vulkan/tu_pass.c | 1 + src/freedreno/vulkan/tu_private.h | 1 + src/freedreno/vulkan/tu_query.c | 23 ++++++++++++++++------- 4 files changed, 44 insertions(+), 9 deletions(-) diff --git a/src/freedreno/vulkan/tu_cmd_buffer.c b/src/freedreno/vulkan/tu_cmd_buffer.c index f5f3f8739f0..7dc83c5356b 100644 --- a/src/freedreno/vulkan/tu_cmd_buffer.c +++ b/src/freedreno/vulkan/tu_cmd_buffer.c @@ -1371,12 +1371,15 @@ tu6_render_tile(struct tu_cmd_buffer *cmd, static void tu6_render_end(struct tu_cmd_buffer *cmd, struct tu_cs *cs) { - VkResult result = tu_cs_reserve_space(cmd->device, cs, 16); + const uint32_t space = 16 + tu_cs_get_call_size(&cmd->draw_epilogue_cs); + VkResult result = tu_cs_reserve_space(cmd->device, cs, space); if (result != VK_SUCCESS) { cmd->record_result = result; return; } + tu_cs_emit_call(cs, &cmd->draw_epilogue_cs); + tu_cs_emit_regs(cs, A6XX_GRAS_LRZ_CNTL(0)); @@ -1652,6 +1655,7 @@ tu_create_cmd_buffer(struct tu_device *device, tu_bo_list_init(&cmd_buffer->bo_list); tu_cs_init(&cmd_buffer->cs, TU_CS_MODE_GROW, 4096); tu_cs_init(&cmd_buffer->draw_cs, TU_CS_MODE_GROW, 4096); + tu_cs_init(&cmd_buffer->draw_epilogue_cs, TU_CS_MODE_GROW, 4096); tu_cs_init(&cmd_buffer->sub_cs, TU_CS_MODE_SUB_STREAM, 2048); *pCommandBuffer = tu_cmd_buffer_to_handle(cmd_buffer); @@ -1703,6 +1707,7 @@ tu_cmd_buffer_destroy(struct tu_cmd_buffer *cmd_buffer) tu_cs_finish(cmd_buffer->device, &cmd_buffer->cs); tu_cs_finish(cmd_buffer->device, &cmd_buffer->draw_cs); + tu_cs_finish(cmd_buffer->device, &cmd_buffer->draw_epilogue_cs); tu_cs_finish(cmd_buffer->device, &cmd_buffer->sub_cs); tu_bo_list_destroy(&cmd_buffer->bo_list); @@ -1719,6 +1724,7 @@ tu_reset_cmd_buffer(struct tu_cmd_buffer *cmd_buffer) tu_bo_list_reset(&cmd_buffer->bo_list); tu_cs_reset(cmd_buffer->device, &cmd_buffer->cs); tu_cs_reset(cmd_buffer->device, &cmd_buffer->draw_cs); + tu_cs_reset(cmd_buffer->device, &cmd_buffer->draw_epilogue_cs); tu_cs_reset(cmd_buffer->device, &cmd_buffer->sub_cs); for (unsigned i = 0; i < VK_PIPELINE_BIND_POINT_RANGE_SIZE; i++) { @@ -1834,6 +1840,7 @@ tu_BeginCommandBuffer(VkCommandBuffer commandBuffer, tu_cs_begin(&cmd_buffer->cs); tu_cs_begin(&cmd_buffer->draw_cs); + tu_cs_begin(&cmd_buffer->draw_epilogue_cs); cmd_buffer->marker_seqno = 0; cmd_buffer->scratch_seqno = 0; @@ -1984,6 +1991,11 @@ tu_EndCommandBuffer(VkCommandBuffer commandBuffer) MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_DUMP); } + for (uint32_t i = 0; i < cmd_buffer->draw_epilogue_cs.bo_count; i++) { + tu_bo_list_add(&cmd_buffer->bo_list, cmd_buffer->draw_epilogue_cs.bos[i], + MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_DUMP); + } + for (uint32_t i = 0; i < cmd_buffer->sub_cs.bo_count; i++) { tu_bo_list_add(&cmd_buffer->bo_list, cmd_buffer->sub_cs.bos[i], MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_DUMP); @@ -1991,6 +2003,7 @@ tu_EndCommandBuffer(VkCommandBuffer commandBuffer) tu_cs_end(&cmd_buffer->cs); tu_cs_end(&cmd_buffer->draw_cs); + tu_cs_end(&cmd_buffer->draw_epilogue_cs); cmd_buffer->status = TU_CMD_BUFFER_STATUS_EXECUTABLE; @@ -2198,6 +2211,13 @@ tu_CmdExecuteCommands(VkCommandBuffer commandBuffer, cmd->record_result = result; break; } + + result = tu_cs_add_entries(&cmd->draw_epilogue_cs, + &secondary->draw_epilogue_cs); + if (result != VK_SUCCESS) { + cmd->record_result = result; + break; + } } cmd->state.dirty = ~0u; /* TODO: set dirty only what needs to be */ } @@ -3780,12 +3800,16 @@ tu_CmdEndRenderPass(VkCommandBuffer commandBuffer) TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer); tu_cs_end(&cmd_buffer->draw_cs); + tu_cs_end(&cmd_buffer->draw_epilogue_cs); tu_cmd_render_tiles(cmd_buffer); - /* discard draw_cs entries now that the tiles are rendered */ + /* discard draw_cs and draw_epilogue_cs entries now that the tiles are + rendered */ tu_cs_discard_entries(&cmd_buffer->draw_cs); tu_cs_begin(&cmd_buffer->draw_cs); + tu_cs_discard_entries(&cmd_buffer->draw_epilogue_cs); + tu_cs_begin(&cmd_buffer->draw_epilogue_cs); cmd_buffer->state.pass = NULL; cmd_buffer->state.subpass = NULL; diff --git a/src/freedreno/vulkan/tu_pass.c b/src/freedreno/vulkan/tu_pass.c index 02c113bcc59..eca129ba804 100644 --- a/src/freedreno/vulkan/tu_pass.c +++ b/src/freedreno/vulkan/tu_pass.c @@ -351,6 +351,7 @@ tu_DestroyRenderPass(VkDevice _device, if (!_pass) return; + vk_free2(&device->alloc, pAllocator, pass->subpass_attachments); vk_free2(&device->alloc, pAllocator, pass); } diff --git a/src/freedreno/vulkan/tu_private.h b/src/freedreno/vulkan/tu_private.h index 61998317b63..81f71ad4487 100644 --- a/src/freedreno/vulkan/tu_private.h +++ b/src/freedreno/vulkan/tu_private.h @@ -936,6 +936,7 @@ struct tu_cmd_buffer struct tu_bo_list bo_list; struct tu_cs cs; struct tu_cs draw_cs; + struct tu_cs draw_epilogue_cs; struct tu_cs sub_cs; uint16_t marker_reg; diff --git a/src/freedreno/vulkan/tu_query.c b/src/freedreno/vulkan/tu_query.c index fbdd567b7d3..d1abf1ed0cb 100644 --- a/src/freedreno/vulkan/tu_query.c +++ b/src/freedreno/vulkan/tu_query.c @@ -245,8 +245,10 @@ emit_end_occlusion_query(struct tu_cmd_buffer *cmdbuf, * pass, we cannot mark as available yet since the commands in * draw_cs are not run until vkCmdEndRenderPass. */ - struct tu_cs *cs = cmdbuf->state.pass ? &cmdbuf->draw_cs : &cmdbuf->cs; + const struct tu_render_pass *pass = cmdbuf->state.pass; + struct tu_cs *cs = pass ? &cmdbuf->draw_cs : &cmdbuf->cs; + uint64_t available_iova = occlusion_query_iova(pool, query, available); uint64_t begin_iova = occlusion_query_iova(pool, query, begin); uint64_t end_iova = occlusion_query_iova(pool, query, end); uint64_t result_iova = occlusion_query_iova(pool, query, result); @@ -284,12 +286,19 @@ emit_end_occlusion_query(struct tu_cmd_buffer *cmdbuf, tu_cs_emit_pkt7(cs, CP_WAIT_MEM_WRITES, 0); - if (!cmdbuf->state.pass) { - tu_cs_reserve_space(cmdbuf->device, cs, 5); - tu_cs_emit_pkt7(cs, CP_MEM_WRITE, 4); - tu_cs_emit_qw(cs, occlusion_query_iova(pool, query, available)); - tu_cs_emit_qw(cs, 0x1); - } + if (pass) + /* Technically, queries should be tracked per-subpass, but here we track + * at the render pass level to simply the code a bit. This is safe + * because the only commands that use the available bit are + * vkCmdCopyQueryPoolResults and vkCmdResetQueryPool, both of which + * cannot be invoked from inside a render pass scope. + */ + cs = &cmdbuf->draw_epilogue_cs; + + tu_cs_reserve_space(cmdbuf->device, cs, 5); + tu_cs_emit_pkt7(cs, CP_MEM_WRITE, 4); + tu_cs_emit_qw(cs, available_iova); + tu_cs_emit_qw(cs, 0x1); } void -- 2.30.2