intel/blorp: Make the MOCS setting part of blorp_address
[mesa.git] / src / intel / vulkan / gen8_cmd_buffer.c
index 0628f3a0dd1539164db5cd67382c9efe68c33145..751212b8f43d924c5260e817da3ae0de7d77b570 100644 (file)
@@ -49,10 +49,10 @@ gen8_cmd_buffer_emit_viewport(struct anv_cmd_buffer *cmd_buffer)
       struct GENX(SF_CLIP_VIEWPORT) sf_clip_viewport = {
          .ViewportMatrixElementm00 = vp->width / 2,
          .ViewportMatrixElementm11 = vp->height / 2,
-         .ViewportMatrixElementm22 = 1.0,
+         .ViewportMatrixElementm22 = vp->maxDepth - vp->minDepth,
          .ViewportMatrixElementm30 = vp->x + vp->width / 2,
          .ViewportMatrixElementm31 = vp->y + vp->height / 2,
-         .ViewportMatrixElementm32 = 0.0,
+         .ViewportMatrixElementm32 = vp->minDepth,
          .XMinClipGuardband = -1.0f,
          .XMaxClipGuardband = 1.0f,
          .YMinClipGuardband = -1.0f,
@@ -67,8 +67,7 @@ gen8_cmd_buffer_emit_viewport(struct anv_cmd_buffer *cmd_buffer)
                                  &sf_clip_viewport);
    }
 
-   if (!cmd_buffer->device->info.has_llc)
-      anv_state_clflush(sf_clip_state);
+   anv_state_flush(cmd_buffer->device, sf_clip_state);
 
    anv_batch_emit(&cmd_buffer->batch,
                   GENX(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP), clip) {
@@ -96,8 +95,7 @@ gen8_cmd_buffer_emit_depth_viewport(struct anv_cmd_buffer *cmd_buffer,
       GENX(CC_VIEWPORT_pack)(NULL, cc_state.map + i * 8, &cc_viewport);
    }
 
-   if (!cmd_buffer->device->info.has_llc)
-      anv_state_clflush(cc_state);
+   anv_state_flush(cmd_buffer->device, cc_state);
 
    anv_batch_emit(&cmd_buffer->batch,
                   GENX(3DSTATE_VIEWPORT_STATE_POINTERS_CC), cc) {
@@ -106,67 +104,42 @@ gen8_cmd_buffer_emit_depth_viewport(struct anv_cmd_buffer *cmd_buffer,
 }
 #endif
 
-static void
-__emit_genx_sf_state(struct anv_cmd_buffer *cmd_buffer)
-{
-      uint32_t sf_dw[GENX(3DSTATE_SF_length)];
-      struct GENX(3DSTATE_SF) sf = {
-         GENX(3DSTATE_SF_header),
-         .LineWidth = cmd_buffer->state.dynamic.line_width,
-      };
-      GENX(3DSTATE_SF_pack)(NULL, sf_dw, &sf);
-      /* FIXME: gen9.fs */
-      anv_batch_emit_merge(&cmd_buffer->batch, sf_dw,
-                           cmd_buffer->state.pipeline->gen8.sf);
-}
-
-void
-gen9_emit_sf_state(struct anv_cmd_buffer *cmd_buffer);
-
-#if GEN_GEN == 9
-
-void
-gen9_emit_sf_state(struct anv_cmd_buffer *cmd_buffer)
-{
-   __emit_genx_sf_state(cmd_buffer);
-}
-
-#endif
-
-#if GEN_GEN == 8
-
-static void
-__emit_sf_state(struct anv_cmd_buffer *cmd_buffer)
-{
-   if (cmd_buffer->device->info.is_cherryview)
-      gen9_emit_sf_state(cmd_buffer);
-   else
-      __emit_genx_sf_state(cmd_buffer);
-}
-
-#else
-
-static void
-__emit_sf_state(struct anv_cmd_buffer *cmd_buffer)
-{
-   __emit_genx_sf_state(cmd_buffer);
-}
-
-#endif
-
 void
 genX(cmd_buffer_enable_pma_fix)(struct anv_cmd_buffer *cmd_buffer, bool enable)
 {
-#if GEN_GEN == 8
    if (cmd_buffer->state.pma_fix_enabled == enable)
       return;
 
+   cmd_buffer->state.pma_fix_enabled = enable;
+
+   /* According to the Broadwell PIPE_CONTROL documentation, software should
+    * emit a PIPE_CONTROL with the CS Stall and Depth Cache Flush bits set
+    * prior to the LRI.  If stencil buffer writes are enabled, then a Render
+    * Cache Flush is also necessary.
+    *
+    * The Skylake docs say to use a depth stall rather than a command
+    * streamer stall.  However, the hardware seems to violently disagree.
+    * A full command streamer stall seems to be needed in both cases.
+    */
    anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
       pc.DepthCacheFlushEnable = true;
       pc.CommandStreamerStallEnable = true;
       pc.RenderTargetCacheFlushEnable = true;
    }
 
+#if GEN_GEN == 9
+
+   uint32_t cache_mode;
+   anv_pack_struct(&cache_mode, GENX(CACHE_MODE_0),
+                   .STCPMAOptimizationEnable = enable,
+                   .STCPMAOptimizationEnableMask = true);
+   anv_batch_emit(&cmd_buffer->batch, GENX(MI_LOAD_REGISTER_IMM), lri) {
+      lri.RegisterOffset   = GENX(CACHE_MODE_0_num);
+      lri.DataDWord        = cache_mode;
+   }
+
+#elif GEN_GEN == 8
+
    uint32_t cache_mode;
    anv_pack_struct(&cache_mode, GENX(CACHE_MODE_1),
                    .NPPMAFixEnable = enable,
@@ -178,21 +151,23 @@ genX(cmd_buffer_enable_pma_fix)(struct anv_cmd_buffer *cmd_buffer, bool enable)
       lri.DataDWord        = cache_mode;
    }
 
+#endif /* GEN_GEN == 8 */
+
    /* After the LRI, a PIPE_CONTROL with both the Depth Stall and Depth Cache
     * Flush bits is often necessary.  We do it regardless because it's easier.
     * The render cache flush is also necessary if stencil writes are enabled.
+    *
+    * Again, the Skylake docs give a different set of flushes but the BDW
+    * flushes seem to work just as well.
     */
    anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
       pc.DepthStallEnable = true;
       pc.DepthCacheFlushEnable = true;
       pc.RenderTargetCacheFlushEnable = true;
    }
-
-   cmd_buffer->state.pma_fix_enabled = enable;
-#endif /* GEN_GEN == 8 */
 }
 
-static inline bool
+UNUSED static bool
 want_depth_pma_fix(struct anv_cmd_buffer *cmd_buffer)
 {
    assert(GEN_GEN == 8);
@@ -281,6 +256,128 @@ want_depth_pma_fix(struct anv_cmd_buffer *cmd_buffer)
           wm_prog_data->computed_depth_mode != PSCDEPTH_OFF;
 }
 
+UNUSED static bool
+want_stencil_pma_fix(struct anv_cmd_buffer *cmd_buffer)
+{
+   if (GEN_GEN > 9)
+      return false;
+   assert(GEN_GEN == 9);
+
+   /* From the Skylake PRM Vol. 2c CACHE_MODE_1::STC PMA Optimization Enable:
+    *
+    *    Clearing this bit will force the STC cache to wait for pending
+    *    retirement of pixels at the HZ-read stage and do the STC-test for
+    *    Non-promoted, R-computed and Computed depth modes instead of
+    *    postponing the STC-test to RCPFE.
+    *
+    *    STC_TEST_EN = 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE &&
+    *                  3DSTATE_WM_DEPTH_STENCIL::StencilTestEnable
+    *
+    *    STC_WRITE_EN = 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE &&
+    *                   (3DSTATE_WM_DEPTH_STENCIL::Stencil Buffer Write Enable &&
+    *                    3DSTATE_DEPTH_BUFFER::STENCIL_WRITE_ENABLE)
+    *
+    *    COMP_STC_EN = STC_TEST_EN &&
+    *                  3DSTATE_PS_EXTRA::PixelShaderComputesStencil
+    *
+    *    SW parses the pipeline states to generate the following logical
+    *    signal indicating if PMA FIX can be enabled.
+    *
+    *    STC_PMA_OPT =
+    *       3DSTATE_WM::ForceThreadDispatch != 1 &&
+    *       !(3DSTATE_RASTER::ForceSampleCount != NUMRASTSAMPLES_0) &&
+    *       3DSTATE_DEPTH_BUFFER::SURFACE_TYPE != NULL &&
+    *       3DSTATE_DEPTH_BUFFER::HIZ Enable &&
+    *       !(3DSTATE_WM::EDSC_Mode == 2) &&
+    *       3DSTATE_PS_EXTRA::PixelShaderValid &&
+    *       !(3DSTATE_WM_HZ_OP::DepthBufferClear ||
+    *         3DSTATE_WM_HZ_OP::DepthBufferResolve ||
+    *         3DSTATE_WM_HZ_OP::Hierarchical Depth Buffer Resolve Enable ||
+    *         3DSTATE_WM_HZ_OP::StencilBufferClear) &&
+    *       (COMP_STC_EN || STC_WRITE_EN) &&
+    *       ((3DSTATE_PS_EXTRA::PixelShaderKillsPixels ||
+    *         3DSTATE_WM::ForceKillPix == ON ||
+    *         3DSTATE_PS_EXTRA::oMask Present to RenderTarget ||
+    *         3DSTATE_PS_BLEND::AlphaToCoverageEnable ||
+    *         3DSTATE_PS_BLEND::AlphaTestEnable ||
+    *         3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable) ||
+    *        (3DSTATE_PS_EXTRA::Pixel Shader Computed Depth mode != PSCDEPTH_OFF))
+    */
+
+   /* These are always true:
+    *    3DSTATE_WM::ForceThreadDispatch != 1 &&
+    *    !(3DSTATE_RASTER::ForceSampleCount != NUMRASTSAMPLES_0)
+    */
+
+   /* We only enable the PMA fix if we know for certain that HiZ is enabled.
+    * If we don't know whether HiZ is enabled or not, we disable the PMA fix
+    * and there is no harm.
+    *
+    * (3DSTATE_DEPTH_BUFFER::SURFACE_TYPE != NULL) &&
+    * 3DSTATE_DEPTH_BUFFER::HIZ Enable
+    */
+   if (!cmd_buffer->state.hiz_enabled)
+      return false;
+
+   /* We can't possibly know if HiZ is enabled without the framebuffer */
+   assert(cmd_buffer->state.framebuffer);
+
+   /* HiZ is enabled so we had better have a depth buffer with HiZ */
+   const struct anv_image_view *ds_iview =
+      anv_cmd_buffer_get_depth_stencil_view(cmd_buffer);
+   assert(ds_iview && ds_iview->image->planes[0].aux_usage == ISL_AUX_USAGE_HIZ);
+
+   /* 3DSTATE_PS_EXTRA::PixelShaderValid */
+   struct anv_pipeline *pipeline = cmd_buffer->state.pipeline;
+   if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT))
+      return false;
+
+   /* !(3DSTATE_WM::EDSC_Mode == 2) */
+   const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
+   if (wm_prog_data->early_fragment_tests)
+      return false;
+
+   /* We never use anv_pipeline for HiZ ops so this is trivially true:
+   *    !(3DSTATE_WM_HZ_OP::DepthBufferClear ||
+    *      3DSTATE_WM_HZ_OP::DepthBufferResolve ||
+    *      3DSTATE_WM_HZ_OP::Hierarchical Depth Buffer Resolve Enable ||
+    *      3DSTATE_WM_HZ_OP::StencilBufferClear)
+    */
+
+   /* 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE &&
+    * 3DSTATE_WM_DEPTH_STENCIL::StencilTestEnable
+    */
+   const bool stc_test_en =
+      (ds_iview->image->aspects & VK_IMAGE_ASPECT_STENCIL_BIT) &&
+      pipeline->stencil_test_enable;
+
+   /* 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE &&
+    * (3DSTATE_WM_DEPTH_STENCIL::Stencil Buffer Write Enable &&
+    *  3DSTATE_DEPTH_BUFFER::STENCIL_WRITE_ENABLE)
+    */
+   const bool stc_write_en =
+      (ds_iview->image->aspects & VK_IMAGE_ASPECT_STENCIL_BIT) &&
+      pipeline->writes_stencil;
+
+   /* STC_TEST_EN && 3DSTATE_PS_EXTRA::PixelShaderComputesStencil */
+   const bool comp_stc_en = stc_test_en && wm_prog_data->computed_stencil;
+
+   /* COMP_STC_EN || STC_WRITE_EN */
+   if (!(comp_stc_en || stc_write_en))
+      return false;
+
+   /* (3DSTATE_PS_EXTRA::PixelShaderKillsPixels ||
+    *  3DSTATE_WM::ForceKillPix == ON ||
+    *  3DSTATE_PS_EXTRA::oMask Present to RenderTarget ||
+    *  3DSTATE_PS_BLEND::AlphaToCoverageEnable ||
+    *  3DSTATE_PS_BLEND::AlphaTestEnable ||
+    *  3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable) ||
+    * (3DSTATE_PS_EXTRA::Pixel Shader Computed Depth mode != PSCDEPTH_OFF)
+    */
+   return pipeline->kill_pixel ||
+          wm_prog_data->computed_depth_mode != PSCDEPTH_OFF;
+}
+
 void
 genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer)
 {
@@ -288,7 +385,22 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer)
 
    if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE |
                                   ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH)) {
-      __emit_sf_state(cmd_buffer);
+      uint32_t sf_dw[GENX(3DSTATE_SF_length)];
+      struct GENX(3DSTATE_SF) sf = {
+         GENX(3DSTATE_SF_header),
+      };
+#if GEN_GEN == 8
+      if (cmd_buffer->device->info.is_cherryview) {
+         sf.CHVLineWidth = cmd_buffer->state.dynamic.line_width;
+      } else {
+         sf.LineWidth = cmd_buffer->state.dynamic.line_width;
+      }
+#else
+      sf.LineWidth = cmd_buffer->state.dynamic.line_width,
+#endif
+      GENX(3DSTATE_SF_pack)(NULL, sf_dw, &sf);
+      anv_batch_emit_merge(&cmd_buffer->batch, sf_dw,
+                           cmd_buffer->state.pipeline->gen8.sf);
    }
 
    if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE |
@@ -324,12 +436,11 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer)
          .BlendConstantColorBlue = cmd_buffer->state.dynamic.blend_constants[2],
          .BlendConstantColorAlpha = cmd_buffer->state.dynamic.blend_constants[3],
          .StencilReferenceValue = d->stencil_reference.front & 0xff,
-         .BackFaceStencilReferenceValue = d->stencil_reference.back & 0xff,
+         .BackfaceStencilReferenceValue = d->stencil_reference.back & 0xff,
       };
       GENX(COLOR_CALC_STATE_pack)(NULL, cc_state.map, &cc);
 
-      if (!cmd_buffer->device->info.has_llc)
-         anv_state_clflush(cc_state);
+      anv_state_flush(cmd_buffer->device, cc_state);
 
       anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CC_STATE_POINTERS), ccp) {
          ccp.ColorCalcStatePointer        = cc_state.offset;
@@ -370,33 +481,33 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer)
    if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS) {
       struct anv_state cc_state =
          anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
-                                            GEN9_COLOR_CALC_STATE_length * 4,
+                                            GENX(COLOR_CALC_STATE_length) * 4,
                                             64);
-      struct GEN9_COLOR_CALC_STATE cc = {
+      struct GENX(COLOR_CALC_STATE) cc = {
          .BlendConstantColorRed = cmd_buffer->state.dynamic.blend_constants[0],
          .BlendConstantColorGreen = cmd_buffer->state.dynamic.blend_constants[1],
          .BlendConstantColorBlue = cmd_buffer->state.dynamic.blend_constants[2],
          .BlendConstantColorAlpha = cmd_buffer->state.dynamic.blend_constants[3],
       };
-      GEN9_COLOR_CALC_STATE_pack(NULL, cc_state.map, &cc);
+      GENX(COLOR_CALC_STATE_pack)(NULL, cc_state.map, &cc);
 
-      if (!cmd_buffer->device->info.has_llc)
-         anv_state_clflush(cc_state);
+      anv_state_flush(cmd_buffer->device, cc_state);
 
-      anv_batch_emit(&cmd_buffer->batch, GEN9_3DSTATE_CC_STATE_POINTERS, ccp) {
+      anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CC_STATE_POINTERS), ccp) {
          ccp.ColorCalcStatePointer = cc_state.offset;
          ccp.ColorCalcStatePointerValid = true;
       }
    }
 
    if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE |
+                                  ANV_CMD_DIRTY_RENDER_TARGETS |
                                   ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK |
                                   ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK |
                                   ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE)) {
-      uint32_t dwords[GEN9_3DSTATE_WM_DEPTH_STENCIL_length];
+      uint32_t dwords[GENX(3DSTATE_WM_DEPTH_STENCIL_length)];
       struct anv_dynamic_state *d = &cmd_buffer->state.dynamic;
-      struct GEN9_3DSTATE_WM_DEPTH_STENCIL wm_depth_stencil = {
-         GEN9_3DSTATE_WM_DEPTH_STENCIL_header,
+      struct GENX(3DSTATE_WM_DEPTH_STENCIL) wm_depth_stencil = {
+         GENX(3DSTATE_WM_DEPTH_STENCIL_header),
 
          .StencilTestMask = d->stencil_compare_mask.front & 0xff,
          .StencilWriteMask = d->stencil_write_mask.front & 0xff,
@@ -411,10 +522,13 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer)
             (d->stencil_write_mask.front || d->stencil_write_mask.back) &&
             pipeline->writes_stencil,
       };
-      GEN9_3DSTATE_WM_DEPTH_STENCIL_pack(NULL, dwords, &wm_depth_stencil);
+      GENX(3DSTATE_WM_DEPTH_STENCIL_pack)(NULL, dwords, &wm_depth_stencil);
 
       anv_batch_emit_merge(&cmd_buffer->batch, dwords,
                            pipeline->gen9.wm_depth_stencil);
+
+      genX(cmd_buffer_enable_pma_fix)(cmd_buffer,
+                                      want_stencil_pma_fix(cmd_buffer));
    }
 #endif
 
@@ -497,7 +611,7 @@ void genX(CmdSetEvent)(
       pc.DestinationAddressType  = DAT_PPGTT,
       pc.PostSyncOperation       = WriteImmediateData,
       pc.Address = (struct anv_address) {
-         &cmd_buffer->device->dynamic_state_block_pool.bo,
+         &cmd_buffer->device->dynamic_state_pool.block_pool.bo,
          event->state.offset
       };
       pc.ImmediateData           = VK_EVENT_SET;
@@ -521,7 +635,7 @@ void genX(CmdResetEvent)(
       pc.DestinationAddressType  = DAT_PPGTT;
       pc.PostSyncOperation       = WriteImmediateData;
       pc.Address = (struct anv_address) {
-         &cmd_buffer->device->dynamic_state_block_pool.bo,
+         &cmd_buffer->device->dynamic_state_pool.block_pool.bo,
          event->state.offset
       };
       pc.ImmediateData           = VK_EVENT_RESET;
@@ -550,7 +664,7 @@ void genX(CmdWaitEvents)(
          sem.CompareOperation    = COMPARE_SAD_EQUAL_SDD,
          sem.SemaphoreDataDword  = VK_EVENT_SET,
          sem.SemaphoreAddress = (struct anv_address) {
-            &cmd_buffer->device->dynamic_state_block_pool.bo,
+            &cmd_buffer->device->dynamic_state_pool.block_pool.bo,
             event->state.offset
          };
       }