turnip: Fix vkCmdCopyQueryPoolResults with available flag
authorBrian Ho <brian@brkho.com>
Fri, 24 Jan 2020 23:04:50 +0000 (18:04 -0500)
committerMarge Bot <eric+marge@anholt.net>
Thu, 30 Jan 2020 20:30:46 +0000 (20:30 +0000)
Previously, calling vkCmdCopyQueryPoolResults with the
VK_QUERY_RESULT_WITH_AVAILABILITY_BIT flag set the query result
field in the buffer to 0 if unavailable and the query result if
available. This was a misunderstanding of the Vulkan spec, and this
commit corrects the behavior to emitting a separate available
result in addition to the query result.

Tested-by: Marge Bot <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3560>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3560>

src/freedreno/vulkan/tu_query.c

index c37647154f744bcee9bd03bf8a59c610851f2c27..7931a449fb5709bfee308d264b7f3507c996a630 100644 (file)
@@ -278,6 +278,27 @@ tu_GetQueryPoolResults(VkDevice _device,
    return VK_SUCCESS;
 }
 
+/* Copies a query value from one buffer to another from the GPU. */
+static void
+copy_query_value_gpu(struct tu_cmd_buffer *cmdbuf,
+                     struct tu_cs *cs,
+                     uint64_t src_iova,
+                     uint64_t base_write_iova,
+                     uint32_t offset,
+                     VkQueryResultFlags flags) {
+   uint32_t element_size = flags & VK_QUERY_RESULT_64_BIT ?
+         sizeof(uint64_t) : sizeof(uint32_t);
+   uint64_t write_iova = base_write_iova + (offset * element_size);
+
+   tu_cs_reserve_space(cmdbuf->device, cs, 6);
+   tu_cs_emit_pkt7(cs, CP_MEM_TO_MEM, 5);
+   uint32_t mem_to_mem_flags = flags & VK_QUERY_RESULT_64_BIT ?
+         CP_MEM_TO_MEM_0_DOUBLE : 0;
+   tu_cs_emit(cs, mem_to_mem_flags);
+   tu_cs_emit_qw(cs, write_iova);
+   tu_cs_emit_qw(cs, src_iova);
+}
+
 static void
 emit_copy_occlusion_query_pool_results(struct tu_cmd_buffer *cmdbuf,
                                        struct tu_cs *cs,
@@ -319,53 +340,38 @@ emit_copy_occlusion_query_pool_results(struct tu_cmd_buffer *cmdbuf,
          tu_cs_emit(cs, CP_WAIT_REG_MEM_5_DELAY_LOOP_CYCLES(16));
       }
 
-      /* If the query result is available, conditionally emit a packet to copy
-       * the result (bo->result) into the buffer.
-       *
-       * NOTE: For the conditional packet to be executed, CP_COND_EXEC tests
-       * that ADDR0 != 0 and ADDR1 < REF. The packet here simply tests that
-       * 0 < available < 2, aka available == 1.
-       */
-      tu_cs_reserve_space(cmdbuf->device, cs, 13);
-      tu_cs_emit_pkt7(cs, CP_COND_EXEC, 6);
-      tu_cs_emit_qw(cs, available_iova);
-      tu_cs_emit_qw(cs, available_iova);
-      tu_cs_emit(cs, CP_COND_EXEC_4_REF(0x2));
-      tu_cs_emit(cs, 6); /* Conditionally execute the next 6 DWORDS */
-
-      /* Start of conditional execution */
-      tu_cs_emit_pkt7(cs, CP_MEM_TO_MEM, 5);
-      uint32_t mem_to_mem_flags = flags & VK_QUERY_RESULT_64_BIT ?
-            CP_MEM_TO_MEM_0_DOUBLE : 0;
-      tu_cs_emit(cs, mem_to_mem_flags);
-      tu_cs_emit_qw(cs, buffer_iova);
-      tu_cs_emit_qw(cs, result_iova);
-      /* End of conditional execution */
-
-      /* Like in the case of vkGetQueryPoolResults, copying the results of an
-       * unavailable query with the VK_QUERY_RESULT_WITH_AVAILABILITY_BIT or
-       * VK_QUERY_RESULT_PARTIAL_BIT flags will return 0. */
-      if (flags & (VK_QUERY_RESULT_WITH_AVAILABILITY_BIT |
-                   VK_QUERY_RESULT_PARTIAL_BIT)) {
-         if (flags & VK_QUERY_RESULT_64_BIT) {
-            tu_cs_reserve_space(cmdbuf->device, cs, 10);
-            tu_cs_emit_pkt7(cs, CP_COND_WRITE5, 9);
-         } else {
-            tu_cs_reserve_space(cmdbuf->device, cs, 9);
-            tu_cs_emit_pkt7(cs, CP_COND_WRITE5, 8);
-         }
-         tu_cs_emit(cs, CP_COND_WRITE5_0_FUNCTION(WRITE_EQ) |
-                        CP_COND_WRITE5_0_POLL_MEMORY |
-                        CP_COND_WRITE5_0_WRITE_MEMORY);
+      if (flags & VK_QUERY_RESULT_PARTIAL_BIT) {
+         /* Unconditionally copying the bo->result into the buffer here is
+          * valid because we only set bo->result on vkCmdEndQuery. Thus, even
+          * if the query is unavailable, this will copy the correct partial
+          * value of 0.
+          */
+         copy_query_value_gpu(cmdbuf, cs, result_iova, buffer_iova,
+                              0 /* offset */, flags);
+      } else {
+         /* Conditionally copy bo->result into the buffer based on whether the
+          * query is available.
+          *
+          * NOTE: For the conditional packets to be executed, CP_COND_EXEC
+          * tests that ADDR0 != 0 and ADDR1 < REF. The packet here simply tests
+          * that 0 < available < 2, aka available == 1.
+          */
+         tu_cs_reserve_space(cmdbuf->device, cs, 7);
+         tu_cs_emit_pkt7(cs, CP_COND_EXEC, 6);
          tu_cs_emit_qw(cs, available_iova);
-         tu_cs_emit(cs, CP_COND_WRITE5_3_REF(0));
-         tu_cs_emit(cs, CP_COND_WRITE5_4_MASK(~0));
-         tu_cs_emit_qw(cs, buffer_iova);
-         if (flags & VK_QUERY_RESULT_64_BIT) {
-            tu_cs_emit_qw(cs, 0);
-         } else {
-            tu_cs_emit(cs, 0);
-         }
+         tu_cs_emit_qw(cs, available_iova);
+         tu_cs_emit(cs, CP_COND_EXEC_4_REF(0x2));
+         tu_cs_emit(cs, 6); /* Cond execute the next 6 DWORDS */
+
+         /* Start of conditional execution */
+         copy_query_value_gpu(cmdbuf, cs, result_iova, buffer_iova,
+                              0 /* offset */, flags);
+         /* End of conditional execution */
+      }
+
+      if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) {
+         copy_query_value_gpu(cmdbuf, cs, available_iova, buffer_iova,
+                              1 /* offset */, flags);
       }
    }