anv/pipeline: Handle depth/stencil self-dependencies
authorJason Ekstrand <jason.ekstrand@intel.com>
Wed, 16 Nov 2016 18:39:15 +0000 (10:39 -0800)
committerJason Ekstrand <jason.ekstrand@intel.com>
Tue, 22 Nov 2016 21:44:55 +0000 (13:44 -0800)
Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
src/intel/vulkan/anv_pass.c
src/intel/vulkan/anv_private.h
src/intel/vulkan/genX_pipeline.c

index 1b40ef14670e2b5a97041a327eaf86d8dedc4a23..c1c149b48b5e419ee4056dac9a67348e4fee5126 100644 (file)
@@ -116,6 +116,10 @@ VkResult anv_CreateRenderPass(
             subpass->input_attachments[j] = a;
             pass->attachments[a].usage |= VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT;
             pass->attachments[a].subpass_usage[i] |= ANV_SUBPASS_USAGE_INPUT;
+
+            if (desc->pDepthStencilAttachment &&
+                a == desc->pDepthStencilAttachment->attachment)
+               subpass->has_ds_self_dep = true;
          }
       }
 
index ba2e85a535f7a9f77c5fe50be29e04dfb510cb3a..4fa4f32bcbeb9c21000c7588ae21e4eec88c5518 100644 (file)
@@ -1711,6 +1711,9 @@ struct anv_subpass {
    uint32_t *                                   resolve_attachments;
    uint32_t                                     depth_stencil_attachment;
 
+   /** Subpass has a depth/stencil self-dependency */
+   bool                                         has_ds_self_dep;
+
    /** Subpass has at least one resolve attachment */
    bool                                         has_resolve;
 };
index a49267dc0d9f32f2412262bc65b4d70d14a452f2..9d985752f8cab9d533cc43684392ae157befeb5f 100644 (file)
@@ -1039,7 +1039,7 @@ emit_3dstate_gs(struct anv_pipeline *pipeline)
 }
 
 static void
-emit_3dstate_wm(struct anv_pipeline *pipeline,
+emit_3dstate_wm(struct anv_pipeline *pipeline, struct anv_subpass *subpass,
                 const VkPipelineMultisampleStateCreateInfo *multisample)
 {
    const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
@@ -1069,12 +1069,21 @@ emit_3dstate_wm(struct anv_pipeline *pipeline,
          /* FIXME: This needs a lot more work, cf gen7 upload_wm_state(). */
          wm.ThreadDispatchEnable          = true;
 
-         wm.PixelShaderKillsPixel         = wm_prog_data->uses_kill;
          wm.PixelShaderComputedDepthMode  = wm_prog_data->computed_depth_mode;
          wm.PixelShaderUsesSourceDepth    = wm_prog_data->uses_src_depth;
          wm.PixelShaderUsesSourceW        = wm_prog_data->uses_src_w;
          wm.PixelShaderUsesInputCoverageMask = wm_prog_data->uses_sample_mask;
 
+         /* If the subpass has a depth or stencil self-dependency, then we
+          * need to force the hardware to do the depth/stencil write *after*
+          * fragment shader execution.  Otherwise, the writes may hit memory
+          * before we get around to fetching from the input attachment and we
+          * may get the depth or stencil value from the current draw rather
+          * than the previous one.
+          */
+         wm.PixelShaderKillsPixel         = subpass->has_ds_self_dep ||
+                                            wm_prog_data->uses_kill;
+
          if (samples > 1) {
             wm.MultisampleRasterizationMode = MSRASTMODE_ON_PATTERN;
             if (wm_prog_data->persample_dispatch) {
@@ -1163,7 +1172,8 @@ emit_3dstate_ps(struct anv_pipeline *pipeline)
 
 #if GEN_GEN >= 8
 static void
-emit_3dstate_ps_extra(struct anv_pipeline *pipeline)
+emit_3dstate_ps_extra(struct anv_pipeline *pipeline,
+                      struct anv_subpass *subpass)
 {
    const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
 
@@ -1177,11 +1187,19 @@ emit_3dstate_ps_extra(struct anv_pipeline *pipeline)
       ps.AttributeEnable               = wm_prog_data->num_varying_inputs > 0;
       ps.oMaskPresenttoRenderTarget    = wm_prog_data->uses_omask;
       ps.PixelShaderIsPerSample        = wm_prog_data->persample_dispatch;
-      ps.PixelShaderKillsPixel         = wm_prog_data->uses_kill;
       ps.PixelShaderComputedDepthMode  = wm_prog_data->computed_depth_mode;
       ps.PixelShaderUsesSourceDepth    = wm_prog_data->uses_src_depth;
       ps.PixelShaderUsesSourceW        = wm_prog_data->uses_src_w;
 
+      /* If the subpass has a depth or stencil self-dependency, then we need
+       * to force the hardware to do the depth/stencil write *after* fragment
+       * shader execution.  Otherwise, the writes may hit memory before we get
+       * around to fetching from the input attachment and we may get the depth
+       * or stencil value from the current draw rather than the previous one.
+       */
+      ps.PixelShaderKillsPixel         = subpass->has_ds_self_dep ||
+                                         wm_prog_data->uses_kill;
+
 #if GEN_GEN >= 9
       ps.PixelShaderPullsBary    = wm_prog_data->pulls_bary;
       ps.InputCoverageMaskState  = wm_prog_data->uses_sample_mask ?
@@ -1267,10 +1285,10 @@ genX(graphics_pipeline_create)(
    emit_3dstate_vs(pipeline);
    emit_3dstate_gs(pipeline);
    emit_3dstate_sbe(pipeline);
-   emit_3dstate_wm(pipeline, pCreateInfo->pMultisampleState);
+   emit_3dstate_wm(pipeline, subpass, pCreateInfo->pMultisampleState);
    emit_3dstate_ps(pipeline);
 #if GEN_GEN >= 8
-   emit_3dstate_ps_extra(pipeline);
+   emit_3dstate_ps_extra(pipeline, subpass);
    emit_3dstate_vf_topology(pipeline);
 #endif