turnip: Update query availability on render pass end
authorBrian Ho <brian@brkho.com>
Thu, 16 Jan 2020 17:15:45 +0000 (12:15 -0500)
committerMarge Bot <eric+marge@anholt.net>
Fri, 24 Jan 2020 18:14:01 +0000 (18:14 +0000)
Unlike on an immidiate-mode renderer, Turnip only renders tiles on
vkCmdEndRenderPass. As such, we need to track all queries that were
active in a given render pass and defer setting the available bit
on those queries until after all tiles have rendered.

This commit adds a draw_epilogue_cs to tu_cmd_buffer that is
executed as an IB at the end of tu_CmdEndRenderPass. We then emit
packets to this command stream that update the availability bit of a
given query in tu_CmdEndQuery.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3279>

src/freedreno/vulkan/tu_cmd_buffer.c
src/freedreno/vulkan/tu_pass.c
src/freedreno/vulkan/tu_private.h
src/freedreno/vulkan/tu_query.c

index f5f3f8739f09e72b63876cb5b3f8051d0acc5fdd..7dc83c5356bdc8bcbbcafb777c464e56c72ecc7d 100644 (file)
@@ -1371,12 +1371,15 @@ tu6_render_tile(struct tu_cmd_buffer *cmd,
 static void
 tu6_render_end(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
 {
-   VkResult result = tu_cs_reserve_space(cmd->device, cs, 16);
+   const uint32_t space = 16 + tu_cs_get_call_size(&cmd->draw_epilogue_cs);
+   VkResult result = tu_cs_reserve_space(cmd->device, cs, space);
    if (result != VK_SUCCESS) {
       cmd->record_result = result;
       return;
    }
 
+   tu_cs_emit_call(cs, &cmd->draw_epilogue_cs);
+
    tu_cs_emit_regs(cs,
                    A6XX_GRAS_LRZ_CNTL(0));
 
@@ -1652,6 +1655,7 @@ tu_create_cmd_buffer(struct tu_device *device,
    tu_bo_list_init(&cmd_buffer->bo_list);
    tu_cs_init(&cmd_buffer->cs, TU_CS_MODE_GROW, 4096);
    tu_cs_init(&cmd_buffer->draw_cs, TU_CS_MODE_GROW, 4096);
+   tu_cs_init(&cmd_buffer->draw_epilogue_cs, TU_CS_MODE_GROW, 4096);
    tu_cs_init(&cmd_buffer->sub_cs, TU_CS_MODE_SUB_STREAM, 2048);
 
    *pCommandBuffer = tu_cmd_buffer_to_handle(cmd_buffer);
@@ -1703,6 +1707,7 @@ tu_cmd_buffer_destroy(struct tu_cmd_buffer *cmd_buffer)
 
    tu_cs_finish(cmd_buffer->device, &cmd_buffer->cs);
    tu_cs_finish(cmd_buffer->device, &cmd_buffer->draw_cs);
+   tu_cs_finish(cmd_buffer->device, &cmd_buffer->draw_epilogue_cs);
    tu_cs_finish(cmd_buffer->device, &cmd_buffer->sub_cs);
 
    tu_bo_list_destroy(&cmd_buffer->bo_list);
@@ -1719,6 +1724,7 @@ tu_reset_cmd_buffer(struct tu_cmd_buffer *cmd_buffer)
    tu_bo_list_reset(&cmd_buffer->bo_list);
    tu_cs_reset(cmd_buffer->device, &cmd_buffer->cs);
    tu_cs_reset(cmd_buffer->device, &cmd_buffer->draw_cs);
+   tu_cs_reset(cmd_buffer->device, &cmd_buffer->draw_epilogue_cs);
    tu_cs_reset(cmd_buffer->device, &cmd_buffer->sub_cs);
 
    for (unsigned i = 0; i < VK_PIPELINE_BIND_POINT_RANGE_SIZE; i++) {
@@ -1834,6 +1840,7 @@ tu_BeginCommandBuffer(VkCommandBuffer commandBuffer,
 
    tu_cs_begin(&cmd_buffer->cs);
    tu_cs_begin(&cmd_buffer->draw_cs);
+   tu_cs_begin(&cmd_buffer->draw_epilogue_cs);
 
    cmd_buffer->marker_seqno = 0;
    cmd_buffer->scratch_seqno = 0;
@@ -1984,6 +1991,11 @@ tu_EndCommandBuffer(VkCommandBuffer commandBuffer)
                      MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_DUMP);
    }
 
+   for (uint32_t i = 0; i < cmd_buffer->draw_epilogue_cs.bo_count; i++) {
+      tu_bo_list_add(&cmd_buffer->bo_list, cmd_buffer->draw_epilogue_cs.bos[i],
+                     MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_DUMP);
+   }
+
    for (uint32_t i = 0; i < cmd_buffer->sub_cs.bo_count; i++) {
       tu_bo_list_add(&cmd_buffer->bo_list, cmd_buffer->sub_cs.bos[i],
                      MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_DUMP);
@@ -1991,6 +2003,7 @@ tu_EndCommandBuffer(VkCommandBuffer commandBuffer)
 
    tu_cs_end(&cmd_buffer->cs);
    tu_cs_end(&cmd_buffer->draw_cs);
+   tu_cs_end(&cmd_buffer->draw_epilogue_cs);
 
    cmd_buffer->status = TU_CMD_BUFFER_STATUS_EXECUTABLE;
 
@@ -2198,6 +2211,13 @@ tu_CmdExecuteCommands(VkCommandBuffer commandBuffer,
          cmd->record_result = result;
          break;
       }
+
+      result = tu_cs_add_entries(&cmd->draw_epilogue_cs,
+            &secondary->draw_epilogue_cs);
+      if (result != VK_SUCCESS) {
+         cmd->record_result = result;
+         break;
+      }
    }
    cmd->state.dirty = ~0u; /* TODO: set dirty only what needs to be */
 }
@@ -3780,12 +3800,16 @@ tu_CmdEndRenderPass(VkCommandBuffer commandBuffer)
    TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer);
 
    tu_cs_end(&cmd_buffer->draw_cs);
+   tu_cs_end(&cmd_buffer->draw_epilogue_cs);
 
    tu_cmd_render_tiles(cmd_buffer);
 
-   /* discard draw_cs entries now that the tiles are rendered */
+   /* discard draw_cs and draw_epilogue_cs entries now that the tiles are
+      rendered */
    tu_cs_discard_entries(&cmd_buffer->draw_cs);
    tu_cs_begin(&cmd_buffer->draw_cs);
+   tu_cs_discard_entries(&cmd_buffer->draw_epilogue_cs);
+   tu_cs_begin(&cmd_buffer->draw_epilogue_cs);
 
    cmd_buffer->state.pass = NULL;
    cmd_buffer->state.subpass = NULL;
index 02c113bcc5954ee9b268dfca04a9b91bd410a0f5..eca129ba804dd1b3bc4db4f8d999797f8ed82c0a 100644 (file)
@@ -351,6 +351,7 @@ tu_DestroyRenderPass(VkDevice _device,
 
    if (!_pass)
       return;
+
    vk_free2(&device->alloc, pAllocator, pass->subpass_attachments);
    vk_free2(&device->alloc, pAllocator, pass);
 }
index 61998317b63e6a2b3e83dd5a7986838ba3938e10..81f71ad44874b5833d666a0097eb6191248964cf 100644 (file)
@@ -936,6 +936,7 @@ struct tu_cmd_buffer
    struct tu_bo_list bo_list;
    struct tu_cs cs;
    struct tu_cs draw_cs;
+   struct tu_cs draw_epilogue_cs;
    struct tu_cs sub_cs;
 
    uint16_t marker_reg;
index fbdd567b7d3865bdce25ce28e4fa4a6520557b24..d1abf1ed0cbbbe1ae89f3f4784d3161749524354 100644 (file)
@@ -245,8 +245,10 @@ emit_end_occlusion_query(struct tu_cmd_buffer *cmdbuf,
     *       pass, we cannot mark as available yet since the commands in
     *       draw_cs are not run until vkCmdEndRenderPass.
     */
-   struct tu_cs *cs = cmdbuf->state.pass ? &cmdbuf->draw_cs : &cmdbuf->cs;
+   const struct tu_render_pass *pass = cmdbuf->state.pass;
+   struct tu_cs *cs = pass ? &cmdbuf->draw_cs : &cmdbuf->cs;
 
+   uint64_t available_iova = occlusion_query_iova(pool, query, available);
    uint64_t begin_iova = occlusion_query_iova(pool, query, begin);
    uint64_t end_iova = occlusion_query_iova(pool, query, end);
    uint64_t result_iova = occlusion_query_iova(pool, query, result);
@@ -284,12 +286,19 @@ emit_end_occlusion_query(struct tu_cmd_buffer *cmdbuf,
 
    tu_cs_emit_pkt7(cs, CP_WAIT_MEM_WRITES, 0);
 
-   if (!cmdbuf->state.pass) {
-      tu_cs_reserve_space(cmdbuf->device, cs, 5);
-      tu_cs_emit_pkt7(cs, CP_MEM_WRITE, 4);
-      tu_cs_emit_qw(cs, occlusion_query_iova(pool, query, available));
-      tu_cs_emit_qw(cs, 0x1);
-   }
+   if (pass)
+      /* Technically, queries should be tracked per-subpass, but here we track
+       * at the render pass level to simply the code a bit. This is safe
+       * because the only commands that use the available bit are
+       * vkCmdCopyQueryPoolResults and vkCmdResetQueryPool, both of which
+       * cannot be invoked from inside a render pass scope.
+       */
+      cs = &cmdbuf->draw_epilogue_cs;
+
+   tu_cs_reserve_space(cmdbuf->device, cs, 5);
+   tu_cs_emit_pkt7(cs, CP_MEM_WRITE, 4);
+   tu_cs_emit_qw(cs, available_iova);
+   tu_cs_emit_qw(cs, 0x1);
 }
 
 void