From ac0f8a6ea050fcb00b26a64cf717a7329ae23337 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 14 Sep 2018 15:10:28 -0500 Subject: [PATCH] anv: Implement transform feedback queries Reviewed-by: Lionel Landwerlin --- src/intel/vulkan/anv_device.c | 2 +- src/intel/vulkan/genX_pipeline.c | 1 + src/intel/vulkan/genX_query.c | 72 +++++++++++++++++++++++++++++++- 3 files changed, 73 insertions(+), 2 deletions(-) diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index 97a5ce305d7..758de3d96d2 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -1305,7 +1305,7 @@ void anv_GetPhysicalDeviceProperties2( props->maxTransformFeedbackStreamDataSize = 128 * 4; props->maxTransformFeedbackBufferDataSize = 128 * 4; props->maxTransformFeedbackBufferDataStride = 2048; - props->transformFeedbackQueries = VK_FALSE; + props->transformFeedbackQueries = VK_TRUE; props->transformFeedbackStreamsLinesTriangles = VK_FALSE; props->transformFeedbackRasterizationStreamSelect = VK_FALSE; props->transformFeedbackDraw = VK_TRUE; diff --git a/src/intel/vulkan/genX_pipeline.c b/src/intel/vulkan/genX_pipeline.c index 899a96fd84f..d2142ae42c2 100644 --- a/src/intel/vulkan/genX_pipeline.c +++ b/src/intel/vulkan/genX_pipeline.c @@ -1150,6 +1150,7 @@ emit_3dstate_streamout(struct anv_pipeline *pipeline, #if GEN_GEN >= 8 if (xfb_info) { so.SOFunctionEnable = true; + so.SOStatisticsEnable = true; const VkPipelineRasterizationStateStreamCreateInfoEXT *stream_info = vk_find_struct_const(rs_info, PIPELINE_RASTERIZATION_STATE_STREAM_CREATE_INFO_EXT); diff --git a/src/intel/vulkan/genX_query.c b/src/intel/vulkan/genX_query.c index c3ecd5b665b..794d92dc6c9 100644 --- a/src/intel/vulkan/genX_query.c +++ b/src/intel/vulkan/genX_query.c @@ -72,6 +72,12 @@ VkResult genX(CreateQueryPool)( /* Statistics queries have a min and max for every statistic */ uint64s_per_slot += 2 * util_bitcount(pipeline_statistics); break; + case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT: + /* Transform feedback queries are 4 values, begin/end for + * written/available. + */ + uint64s_per_slot += 4; + break; default: assert(!"Invalid query type"); } @@ -220,7 +226,8 @@ VkResult genX(GetQueryPoolResults)( assert(pool->type == VK_QUERY_TYPE_OCCLUSION || pool->type == VK_QUERY_TYPE_PIPELINE_STATISTICS || - pool->type == VK_QUERY_TYPE_TIMESTAMP); + pool->type == VK_QUERY_TYPE_TIMESTAMP || + pool->type == VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT); if (anv_device_is_lost(device)) return VK_ERROR_DEVICE_LOST; @@ -284,6 +291,15 @@ VkResult genX(GetQueryPoolResults)( break; } + case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT: + if (write_results) + cpu_write_query_result(pData, flags, idx, slot[2] - slot[1]); + idx++; + if (write_results) + cpu_write_query_result(pData, flags, idx, slot[4] - slot[3]); + idx++; + break; + case VK_QUERY_TYPE_TIMESTAMP: if (write_results) cpu_write_query_result(pData, flags, idx, slot[1]); @@ -411,6 +427,31 @@ emit_pipeline_stat(struct anv_cmd_buffer *cmd_buffer, uint32_t stat, emit_srm64(&cmd_buffer->batch, addr, vk_pipeline_stat_to_reg[stat]); } +static void +emit_xfb_query(struct anv_cmd_buffer *cmd_buffer, uint32_t stream, + struct anv_address addr) +{ + assert(stream < MAX_XFB_STREAMS); + + anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_REGISTER_MEM), lrm) { + lrm.RegisterAddress = GENX(SO_NUM_PRIMS_WRITTEN0_num) + 0 + stream * 8; + lrm.MemoryAddress = anv_address_add(addr, 0); + } + anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_REGISTER_MEM), lrm) { + lrm.RegisterAddress = GENX(SO_NUM_PRIMS_WRITTEN0_num) + 4 + stream * 8; + lrm.MemoryAddress = anv_address_add(addr, 4); + } + + anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_REGISTER_MEM), lrm) { + lrm.RegisterAddress = GENX(SO_PRIM_STORAGE_NEEDED0_num) + 0 + stream * 8; + lrm.MemoryAddress = anv_address_add(addr, 16); + } + anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_REGISTER_MEM), lrm) { + lrm.RegisterAddress = GENX(SO_PRIM_STORAGE_NEEDED0_num) + 4 + stream * 8; + lrm.MemoryAddress = anv_address_add(addr, 20); + } +} + void genX(CmdBeginQuery)( VkCommandBuffer commandBuffer, VkQueryPool queryPool, @@ -454,6 +495,14 @@ void genX(CmdBeginQueryIndexedEXT)( break; } + case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT: + anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) { + pc.CommandStreamerStallEnable = true; + pc.StallAtPixelScoreboard = true; + } + emit_xfb_query(cmd_buffer, index, anv_address_add(query_addr, 8)); + break; + default: unreachable(""); } @@ -503,6 +552,16 @@ void genX(CmdEndQueryIndexedEXT)( break; } + case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT: + anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) { + pc.CommandStreamerStallEnable = true; + pc.StallAtPixelScoreboard = true; + } + + emit_xfb_query(cmd_buffer, index, anv_address_add(query_addr, 16)); + emit_query_availability(cmd_buffer, query_addr); + break; + default: unreachable(""); } @@ -797,6 +856,17 @@ void genX(CmdCopyQueryPoolResults)( break; } + case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT: + compute_query_result(&cmd_buffer->batch, MI_ALU_REG2, + anv_address_add(query_addr, 8)); + gpu_write_query_result(&cmd_buffer->batch, dest_addr, + flags, idx++, CS_GPR(2)); + compute_query_result(&cmd_buffer->batch, MI_ALU_REG2, + anv_address_add(query_addr, 24)); + gpu_write_query_result(&cmd_buffer->batch, dest_addr, + flags, idx++, CS_GPR(2)); + break; + case VK_QUERY_TYPE_TIMESTAMP: emit_load_alu_reg_u64(&cmd_buffer->batch, CS_GPR(2), anv_address_add(query_addr, 8)); -- 2.30.2