From d8aacc24cc37dc435e250668aba0817c36996ad1 Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Fri, 7 Oct 2016 12:07:31 -0700 Subject: [PATCH] anv: Enable fast depth clears Provides an FPS increase of ~30% on the Sascha triangle and multisampling demos. Signed-off-by: Nanley Chery Reviewed-by: Jason Ekstrand Reviewed-by: Chad Versace --- src/intel/vulkan/anv_pass.c | 13 +++++++++++++ src/intel/vulkan/genX_cmd_buffer.c | 24 ++++++++++++++++++++++-- 2 files changed, 35 insertions(+), 2 deletions(-) diff --git a/src/intel/vulkan/anv_pass.c b/src/intel/vulkan/anv_pass.c index 69c3c7e96b7..595c2ea67d7 100644 --- a/src/intel/vulkan/anv_pass.c +++ b/src/intel/vulkan/anv_pass.c @@ -155,5 +155,18 @@ void anv_GetRenderAreaGranularity( VkRenderPass renderPass, VkExtent2D* pGranularity) { + ANV_FROM_HANDLE(anv_render_pass, pass, renderPass); + + /* This granularity satisfies HiZ fast clear alignment requirements + * for all sample counts. + */ + for (unsigned i = 0; i < pass->subpass_count; ++i) { + if (pass->subpasses[i].depth_stencil_attachment != + VK_ATTACHMENT_UNUSED) { + *pGranularity = (VkExtent2D) { .width = 8, .height = 4 }; + return; + } + } + *pGranularity = (VkExtent2D) { 1, 1 }; } diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index 7dd4039b0c5..b1fa6ee5143 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -1322,8 +1322,27 @@ cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_STENCIL_BUFFER), sb); } - /* Clear the clear params. */ - anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CLEAR_PARAMS), cp); + /* From the IVB PRM Vol2P1, 11.5.5.4 3DSTATE_CLEAR_PARAMS: + * + * 3DSTATE_CLEAR_PARAMS must always be programmed in the along with + * the other Depth/Stencil state commands(i.e. 3DSTATE_DEPTH_BUFFER, + * 3DSTATE_STENCIL_BUFFER, or 3DSTATE_HIER_DEPTH_BUFFER) + * + * Testing also shows that some variant of this restriction may exist HSW+. + * On BDW+, it is not possible to emit 2 of these packets consecutively when + * both have DepthClearValueValid set. An analysis of such state programming + * on SKL showed that the GPU doesn't register the latter packet's clear + * value. + */ + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CLEAR_PARAMS), cp) { + if (has_hiz) { + cp.DepthClearValueValid = true; + const uint32_t ds = + cmd_buffer->state.subpass->depth_stencil_attachment; + cp.DepthClearValue = + cmd_buffer->state.attachments[ds].clear_value.depthStencil.depth; + } + } } static void @@ -1336,6 +1355,7 @@ genX(cmd_buffer_set_subpass)(struct anv_cmd_buffer *cmd_buffer, cmd_buffer_emit_depth_stencil(cmd_buffer); genX(cmd_buffer_emit_hz_op)(cmd_buffer, BLORP_HIZ_OP_HIZ_RESOLVE); + genX(cmd_buffer_emit_hz_op)(cmd_buffer, BLORP_HIZ_OP_DEPTH_CLEAR); anv_cmd_buffer_clear_subpass(cmd_buffer); } -- 2.30.2