From: Brian Ho Date: Fri, 24 Jan 2020 23:04:50 +0000 (-0500) Subject: turnip: Fix vkCmdCopyQueryPoolResults with available flag X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=58fd26c4332aa3f3ffd34d99d996ba62476d5acd;p=mesa.git turnip: Fix vkCmdCopyQueryPoolResults with available flag Previously, calling vkCmdCopyQueryPoolResults with the VK_QUERY_RESULT_WITH_AVAILABILITY_BIT flag set the query result field in the buffer to 0 if unavailable and the query result if available. This was a misunderstanding of the Vulkan spec, and this commit corrects the behavior to emitting a separate available result in addition to the query result. Tested-by: Marge Bot Part-of: --- diff --git a/src/freedreno/vulkan/tu_query.c b/src/freedreno/vulkan/tu_query.c index c37647154f7..7931a449fb5 100644 --- a/src/freedreno/vulkan/tu_query.c +++ b/src/freedreno/vulkan/tu_query.c @@ -278,6 +278,27 @@ tu_GetQueryPoolResults(VkDevice _device, return VK_SUCCESS; } +/* Copies a query value from one buffer to another from the GPU. */ +static void +copy_query_value_gpu(struct tu_cmd_buffer *cmdbuf, + struct tu_cs *cs, + uint64_t src_iova, + uint64_t base_write_iova, + uint32_t offset, + VkQueryResultFlags flags) { + uint32_t element_size = flags & VK_QUERY_RESULT_64_BIT ? + sizeof(uint64_t) : sizeof(uint32_t); + uint64_t write_iova = base_write_iova + (offset * element_size); + + tu_cs_reserve_space(cmdbuf->device, cs, 6); + tu_cs_emit_pkt7(cs, CP_MEM_TO_MEM, 5); + uint32_t mem_to_mem_flags = flags & VK_QUERY_RESULT_64_BIT ? + CP_MEM_TO_MEM_0_DOUBLE : 0; + tu_cs_emit(cs, mem_to_mem_flags); + tu_cs_emit_qw(cs, write_iova); + tu_cs_emit_qw(cs, src_iova); +} + static void emit_copy_occlusion_query_pool_results(struct tu_cmd_buffer *cmdbuf, struct tu_cs *cs, @@ -319,53 +340,38 @@ emit_copy_occlusion_query_pool_results(struct tu_cmd_buffer *cmdbuf, tu_cs_emit(cs, CP_WAIT_REG_MEM_5_DELAY_LOOP_CYCLES(16)); } - /* If the query result is available, conditionally emit a packet to copy - * the result (bo->result) into the buffer. - * - * NOTE: For the conditional packet to be executed, CP_COND_EXEC tests - * that ADDR0 != 0 and ADDR1 < REF. The packet here simply tests that - * 0 < available < 2, aka available == 1. - */ - tu_cs_reserve_space(cmdbuf->device, cs, 13); - tu_cs_emit_pkt7(cs, CP_COND_EXEC, 6); - tu_cs_emit_qw(cs, available_iova); - tu_cs_emit_qw(cs, available_iova); - tu_cs_emit(cs, CP_COND_EXEC_4_REF(0x2)); - tu_cs_emit(cs, 6); /* Conditionally execute the next 6 DWORDS */ - - /* Start of conditional execution */ - tu_cs_emit_pkt7(cs, CP_MEM_TO_MEM, 5); - uint32_t mem_to_mem_flags = flags & VK_QUERY_RESULT_64_BIT ? - CP_MEM_TO_MEM_0_DOUBLE : 0; - tu_cs_emit(cs, mem_to_mem_flags); - tu_cs_emit_qw(cs, buffer_iova); - tu_cs_emit_qw(cs, result_iova); - /* End of conditional execution */ - - /* Like in the case of vkGetQueryPoolResults, copying the results of an - * unavailable query with the VK_QUERY_RESULT_WITH_AVAILABILITY_BIT or - * VK_QUERY_RESULT_PARTIAL_BIT flags will return 0. */ - if (flags & (VK_QUERY_RESULT_WITH_AVAILABILITY_BIT | - VK_QUERY_RESULT_PARTIAL_BIT)) { - if (flags & VK_QUERY_RESULT_64_BIT) { - tu_cs_reserve_space(cmdbuf->device, cs, 10); - tu_cs_emit_pkt7(cs, CP_COND_WRITE5, 9); - } else { - tu_cs_reserve_space(cmdbuf->device, cs, 9); - tu_cs_emit_pkt7(cs, CP_COND_WRITE5, 8); - } - tu_cs_emit(cs, CP_COND_WRITE5_0_FUNCTION(WRITE_EQ) | - CP_COND_WRITE5_0_POLL_MEMORY | - CP_COND_WRITE5_0_WRITE_MEMORY); + if (flags & VK_QUERY_RESULT_PARTIAL_BIT) { + /* Unconditionally copying the bo->result into the buffer here is + * valid because we only set bo->result on vkCmdEndQuery. Thus, even + * if the query is unavailable, this will copy the correct partial + * value of 0. + */ + copy_query_value_gpu(cmdbuf, cs, result_iova, buffer_iova, + 0 /* offset */, flags); + } else { + /* Conditionally copy bo->result into the buffer based on whether the + * query is available. + * + * NOTE: For the conditional packets to be executed, CP_COND_EXEC + * tests that ADDR0 != 0 and ADDR1 < REF. The packet here simply tests + * that 0 < available < 2, aka available == 1. + */ + tu_cs_reserve_space(cmdbuf->device, cs, 7); + tu_cs_emit_pkt7(cs, CP_COND_EXEC, 6); tu_cs_emit_qw(cs, available_iova); - tu_cs_emit(cs, CP_COND_WRITE5_3_REF(0)); - tu_cs_emit(cs, CP_COND_WRITE5_4_MASK(~0)); - tu_cs_emit_qw(cs, buffer_iova); - if (flags & VK_QUERY_RESULT_64_BIT) { - tu_cs_emit_qw(cs, 0); - } else { - tu_cs_emit(cs, 0); - } + tu_cs_emit_qw(cs, available_iova); + tu_cs_emit(cs, CP_COND_EXEC_4_REF(0x2)); + tu_cs_emit(cs, 6); /* Cond execute the next 6 DWORDS */ + + /* Start of conditional execution */ + copy_query_value_gpu(cmdbuf, cs, result_iova, buffer_iova, + 0 /* offset */, flags); + /* End of conditional execution */ + } + + if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) { + copy_query_value_gpu(cmdbuf, cs, available_iova, buffer_iova, + 1 /* offset */, flags); } }