From: Jason Ekstrand Date: Wed, 16 Nov 2016 18:39:15 +0000 (-0800) Subject: anv/pipeline: Handle depth/stencil self-dependencies X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=140d041fac24b6140f8df8f02418b8bc1503d4a8;p=mesa.git anv/pipeline: Handle depth/stencil self-dependencies Reviewed-by: Jordan Justen --- diff --git a/src/intel/vulkan/anv_pass.c b/src/intel/vulkan/anv_pass.c index 1b40ef14670..c1c149b48b5 100644 --- a/src/intel/vulkan/anv_pass.c +++ b/src/intel/vulkan/anv_pass.c @@ -116,6 +116,10 @@ VkResult anv_CreateRenderPass( subpass->input_attachments[j] = a; pass->attachments[a].usage |= VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT; pass->attachments[a].subpass_usage[i] |= ANV_SUBPASS_USAGE_INPUT; + + if (desc->pDepthStencilAttachment && + a == desc->pDepthStencilAttachment->attachment) + subpass->has_ds_self_dep = true; } } diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index ba2e85a535f..4fa4f32bcbe 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -1711,6 +1711,9 @@ struct anv_subpass { uint32_t * resolve_attachments; uint32_t depth_stencil_attachment; + /** Subpass has a depth/stencil self-dependency */ + bool has_ds_self_dep; + /** Subpass has at least one resolve attachment */ bool has_resolve; }; diff --git a/src/intel/vulkan/genX_pipeline.c b/src/intel/vulkan/genX_pipeline.c index a49267dc0d9..9d985752f8c 100644 --- a/src/intel/vulkan/genX_pipeline.c +++ b/src/intel/vulkan/genX_pipeline.c @@ -1039,7 +1039,7 @@ emit_3dstate_gs(struct anv_pipeline *pipeline) } static void -emit_3dstate_wm(struct anv_pipeline *pipeline, +emit_3dstate_wm(struct anv_pipeline *pipeline, struct anv_subpass *subpass, const VkPipelineMultisampleStateCreateInfo *multisample) { const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline); @@ -1069,12 +1069,21 @@ emit_3dstate_wm(struct anv_pipeline *pipeline, /* FIXME: This needs a lot more work, cf gen7 upload_wm_state(). */ wm.ThreadDispatchEnable = true; - wm.PixelShaderKillsPixel = wm_prog_data->uses_kill; wm.PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode; wm.PixelShaderUsesSourceDepth = wm_prog_data->uses_src_depth; wm.PixelShaderUsesSourceW = wm_prog_data->uses_src_w; wm.PixelShaderUsesInputCoverageMask = wm_prog_data->uses_sample_mask; + /* If the subpass has a depth or stencil self-dependency, then we + * need to force the hardware to do the depth/stencil write *after* + * fragment shader execution. Otherwise, the writes may hit memory + * before we get around to fetching from the input attachment and we + * may get the depth or stencil value from the current draw rather + * than the previous one. + */ + wm.PixelShaderKillsPixel = subpass->has_ds_self_dep || + wm_prog_data->uses_kill; + if (samples > 1) { wm.MultisampleRasterizationMode = MSRASTMODE_ON_PATTERN; if (wm_prog_data->persample_dispatch) { @@ -1163,7 +1172,8 @@ emit_3dstate_ps(struct anv_pipeline *pipeline) #if GEN_GEN >= 8 static void -emit_3dstate_ps_extra(struct anv_pipeline *pipeline) +emit_3dstate_ps_extra(struct anv_pipeline *pipeline, + struct anv_subpass *subpass) { const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline); @@ -1177,11 +1187,19 @@ emit_3dstate_ps_extra(struct anv_pipeline *pipeline) ps.AttributeEnable = wm_prog_data->num_varying_inputs > 0; ps.oMaskPresenttoRenderTarget = wm_prog_data->uses_omask; ps.PixelShaderIsPerSample = wm_prog_data->persample_dispatch; - ps.PixelShaderKillsPixel = wm_prog_data->uses_kill; ps.PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode; ps.PixelShaderUsesSourceDepth = wm_prog_data->uses_src_depth; ps.PixelShaderUsesSourceW = wm_prog_data->uses_src_w; + /* If the subpass has a depth or stencil self-dependency, then we need + * to force the hardware to do the depth/stencil write *after* fragment + * shader execution. Otherwise, the writes may hit memory before we get + * around to fetching from the input attachment and we may get the depth + * or stencil value from the current draw rather than the previous one. + */ + ps.PixelShaderKillsPixel = subpass->has_ds_self_dep || + wm_prog_data->uses_kill; + #if GEN_GEN >= 9 ps.PixelShaderPullsBary = wm_prog_data->pulls_bary; ps.InputCoverageMaskState = wm_prog_data->uses_sample_mask ? @@ -1267,10 +1285,10 @@ genX(graphics_pipeline_create)( emit_3dstate_vs(pipeline); emit_3dstate_gs(pipeline); emit_3dstate_sbe(pipeline); - emit_3dstate_wm(pipeline, pCreateInfo->pMultisampleState); + emit_3dstate_wm(pipeline, subpass, pCreateInfo->pMultisampleState); emit_3dstate_ps(pipeline); #if GEN_GEN >= 8 - emit_3dstate_ps_extra(pipeline); + emit_3dstate_ps_extra(pipeline, subpass); emit_3dstate_vf_topology(pipeline); #endif