From a07d06f10352fc5fa40db8a723fa5842ebc660db Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Wed, 1 May 2019 12:30:41 +0100 Subject: [PATCH] anv: rework queries writes to ensure ordering memory writes We use a mix of MI & PIPE_CONTROL commands to write our queries' data (results & availability). Those commands' memory write order is not guaranteed with regard to their order in the command stream, unless CS stalls are inserted between them. This is problematic for 2 reasons : 1. We copy results from the device using MI commands even though the values are generated from PIPE_CONTROL, meaning we could copy unlanded values into the results and then copy the availability that is inconsistent with the values. 2. We allow the user to poll on the availability values of the query pool from the CPU. If the availability lands in memory before the values then we could return invalid values. This change does 2 things to address this problem : - We use either PIPE_CONTROL or MI commands to write both queries values and availability, so that the ordering of the memory writes guarantees that if availability is visible, results are also visible. - For the occlusion & timestamp queries we apply a CS stall before copying the results on the device, to ensure copying with MI commands see the correct values of previous PIPE_CONTROL writes of availability (required by the Vulkan spec). Signed-off-by: Lionel Landwerlin Reported-by: Iago Toral Quiroga Cc: mesa-stable@lists.freedesktop.org Reviewed-by: Jason Ekstrand --- src/intel/vulkan/genX_query.c | 101 ++++++++++++++++++++++++++++------ 1 file changed, 84 insertions(+), 17 deletions(-) diff --git a/src/intel/vulkan/genX_query.c b/src/intel/vulkan/genX_query.c index 146435c3f8f..aa0cf8b9471 100644 --- a/src/intel/vulkan/genX_query.c +++ b/src/intel/vulkan/genX_query.c @@ -346,14 +346,23 @@ emit_ps_depth_count(struct anv_cmd_buffer *cmd_buffer, } static void -emit_query_availability(struct anv_cmd_buffer *cmd_buffer, - struct anv_address addr) +emit_query_mi_availability(struct gen_mi_builder *b, + struct anv_address addr, + bool available) +{ + gen_mi_store(b, gen_mi_mem64(addr), gen_mi_imm(available)); +} + +static void +emit_query_pc_availability(struct anv_cmd_buffer *cmd_buffer, + struct anv_address addr, + bool available) { anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) { pc.DestinationAddressType = DAT_PPGTT; pc.PostSyncOperation = WriteImmediateData; pc.Address = addr; - pc.ImmediateData = 1; + pc.ImmediateData = available; } } @@ -366,11 +375,39 @@ emit_zero_queries(struct anv_cmd_buffer *cmd_buffer, struct gen_mi_builder *b, struct anv_query_pool *pool, uint32_t first_index, uint32_t num_queries) { - for (uint32_t i = 0; i < num_queries; i++) { - struct anv_address slot_addr = - anv_query_address(pool, first_index + i); - gen_mi_memset(b, anv_address_add(slot_addr, 8), 0, pool->stride - 8); - emit_query_availability(cmd_buffer, slot_addr); + switch (pool->type) { + case VK_QUERY_TYPE_OCCLUSION: + case VK_QUERY_TYPE_TIMESTAMP: + /* These queries are written with a PIPE_CONTROL so clear them using the + * PIPE_CONTROL as well so we don't have to synchronize between 2 types + * of operations. + */ + assert((pool->stride % 8) == 0); + for (uint32_t i = 0; i < num_queries; i++) { + struct anv_address slot_addr = + anv_query_address(pool, first_index + i); + + for (uint32_t qword = 1; qword < (pool->stride / 8); qword++) { + emit_query_pc_availability(cmd_buffer, + anv_address_add(slot_addr, qword * 8), + false); + } + emit_query_pc_availability(cmd_buffer, slot_addr, true); + } + break; + + case VK_QUERY_TYPE_PIPELINE_STATISTICS: + case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT: + for (uint32_t i = 0; i < num_queries; i++) { + struct anv_address slot_addr = + anv_query_address(pool, first_index + i); + gen_mi_memset(b, anv_address_add(slot_addr, 8), 0, pool->stride - 8); + emit_query_mi_availability(b, slot_addr, true); + } + break; + + default: + unreachable("Unsupported query type"); } } @@ -383,11 +420,28 @@ void genX(CmdResetQueryPool)( ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); - for (uint32_t i = 0; i < queryCount; i++) { - anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_DATA_IMM), sdm) { - sdm.Address = anv_query_address(pool, firstQuery + i); - sdm.ImmediateData = 0; + switch (pool->type) { + case VK_QUERY_TYPE_OCCLUSION: + case VK_QUERY_TYPE_TIMESTAMP: + for (uint32_t i = 0; i < queryCount; i++) { + emit_query_pc_availability(cmd_buffer, + anv_query_address(pool, firstQuery + i), + false); } + break; + + case VK_QUERY_TYPE_PIPELINE_STATISTICS: + case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT: { + struct gen_mi_builder b; + gen_mi_builder_init(&b, &cmd_buffer->batch); + + for (uint32_t i = 0; i < queryCount; i++) + emit_query_mi_availability(&b, anv_query_address(pool, firstQuery + i), false); + break; + } + + default: + unreachable("Unsupported query type"); } } @@ -525,7 +579,7 @@ void genX(CmdEndQueryIndexedEXT)( switch (pool->type) { case VK_QUERY_TYPE_OCCLUSION: emit_ps_depth_count(cmd_buffer, anv_address_add(query_addr, 16)); - emit_query_availability(cmd_buffer, query_addr); + emit_query_pc_availability(cmd_buffer, query_addr, true); break; case VK_QUERY_TYPE_PIPELINE_STATISTICS: { @@ -543,7 +597,7 @@ void genX(CmdEndQueryIndexedEXT)( offset += 16; } - emit_query_availability(cmd_buffer, query_addr); + emit_query_mi_availability(&b, query_addr, true); break; } @@ -554,7 +608,7 @@ void genX(CmdEndQueryIndexedEXT)( } emit_xfb_query(&b, index, anv_address_add(query_addr, 16)); - emit_query_availability(cmd_buffer, query_addr); + emit_query_mi_availability(&b, query_addr, true); break; default: @@ -613,7 +667,7 @@ void genX(CmdWriteTimestamp)( break; } - emit_query_availability(cmd_buffer, query_addr); + emit_query_pc_availability(cmd_buffer, query_addr, true); /* When multiview is active the spec requires that N consecutive query * indices are used, where N is the number of active views in the subpass. @@ -684,7 +738,20 @@ void genX(CmdCopyQueryPoolResults)( } if ((flags & VK_QUERY_RESULT_WAIT_BIT) || - (cmd_buffer->state.pending_pipe_bits & ANV_PIPE_FLUSH_BITS)) { + (cmd_buffer->state.pending_pipe_bits & ANV_PIPE_FLUSH_BITS) || + /* Occlusion & timestamp queries are written using a PIPE_CONTROL and + * because we're about to copy values from MI commands, we need to + * stall the command streamer to make sure the PIPE_CONTROL values have + * landed, otherwise we could see inconsistent values & availability. + * + * From the vulkan spec: + * + * "vkCmdCopyQueryPoolResults is guaranteed to see the effect of + * previous uses of vkCmdResetQueryPool in the same queue, without + * any additional synchronization." + */ + pool->type == VK_QUERY_TYPE_OCCLUSION || + pool->type == VK_QUERY_TYPE_TIMESTAMP) { cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_CS_STALL_BIT; genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer); } -- 2.30.2