intel/anv,blorp,i965: Implement the SKL 16x MSAA SIMD32 workaround
authorJason Ekstrand <jason.ekstrand@intel.com>
Fri, 18 May 2018 23:39:21 +0000 (16:39 -0700)
committerJason Ekstrand <jason.ekstrand@intel.com>
Thu, 28 Jun 2018 20:25:18 +0000 (13:25 -0700)
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
src/intel/blorp/blorp_genX_exec.h
src/intel/vulkan/genX_pipeline.c
src/mesa/drivers/dri/i965/genX_state_upload.c

index 13bdd851e94a103fefaa88f5f81e54e1217dbea8..8bd9174b677ba4a24efd6f5c9840e4fdbbb8ad2c 100644 (file)
@@ -767,6 +767,20 @@ blorp_emit_ps_config(struct blorp_batch *batch,
          ps._16PixelDispatchEnable = prog_data->dispatch_16;
          ps._32PixelDispatchEnable = prog_data->dispatch_32;
 
+         /* From the Sky Lake PRM 3DSTATE_PS::32 Pixel Dispatch Enable:
+          *
+          *    "When NUM_MULTISAMPLES = 16 or FORCE_SAMPLE_COUNT = 16, SIMD32
+          *    Dispatch must not be enabled for PER_PIXEL dispatch mode."
+          *
+          * Since 16x MSAA is first introduced on SKL, we don't need to apply
+          * the workaround on any older hardware.
+          */
+         if (GEN_GEN >= 9 && !prog_data->persample_dispatch &&
+             params->num_samples == 16) {
+            assert(ps._8PixelDispatchEnable || ps._16PixelDispatchEnable);
+            ps._32PixelDispatchEnable = false;
+         }
+
          ps.DispatchGRFStartRegisterForConstantSetupData0 =
             brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 0);
          ps.DispatchGRFStartRegisterForConstantSetupData1 =
index 15b1e0b3880c4223aa0369e40cb690b2e71c7632..197899fb2e30be2dc9f8e8b2c88feb2a5b0c3d62 100644 (file)
@@ -1445,7 +1445,8 @@ is_dual_src_blend_factor(VkBlendFactor factor)
 
 static void
 emit_3dstate_ps(struct anv_pipeline *pipeline,
-                const VkPipelineColorBlendStateCreateInfo *blend)
+                const VkPipelineColorBlendStateCreateInfo *blend,
+                const VkPipelineMultisampleStateCreateInfo *multisample)
 {
    MAYBE_UNUSED const struct gen_device_info *devinfo = &pipeline->device->info;
    const struct anv_shader_bin *fs_bin =
@@ -1492,6 +1493,20 @@ emit_3dstate_ps(struct anv_pipeline *pipeline,
       ps._16PixelDispatchEnable     = wm_prog_data->dispatch_16;
       ps._32PixelDispatchEnable     = wm_prog_data->dispatch_32;
 
+      /* From the Sky Lake PRM 3DSTATE_PS::32 Pixel Dispatch Enable:
+       *
+       *    "When NUM_MULTISAMPLES = 16 or FORCE_SAMPLE_COUNT = 16, SIMD32
+       *    Dispatch must not be enabled for PER_PIXEL dispatch mode."
+       *
+       * Since 16x MSAA is first introduced on SKL, we don't need to apply
+       * the workaround on any older hardware.
+       */
+      if (GEN_GEN >= 9 && !wm_prog_data->persample_dispatch &&
+          multisample && multisample->rasterizationSamples == 16) {
+         assert(ps._8PixelDispatchEnable || ps._16PixelDispatchEnable);
+         ps._32PixelDispatchEnable = false;
+      }
+
       ps.KernelStartPointer0 = fs_bin->kernel.offset +
                                brw_wm_prog_data_prog_offset(wm_prog_data, ps, 0);
       ps.KernelStartPointer1 = fs_bin->kernel.offset +
@@ -1733,7 +1748,8 @@ genX(graphics_pipeline_create)(
    emit_3dstate_sbe(pipeline);
    emit_3dstate_wm(pipeline, subpass, pCreateInfo->pColorBlendState,
                    pCreateInfo->pMultisampleState);
-   emit_3dstate_ps(pipeline, pCreateInfo->pColorBlendState);
+   emit_3dstate_ps(pipeline, pCreateInfo->pColorBlendState,
+                   pCreateInfo->pMultisampleState);
 #if GEN_GEN >= 8
    emit_3dstate_ps_extra(pipeline, subpass, pCreateInfo->pColorBlendState);
    emit_3dstate_vf_topology(pipeline);
index 42cd08ceba9f1f4d9f2bec82d479ff6ca6e62178..b279f01e1a181230574c9df7afc84a3626e75a3a 100644 (file)
@@ -4032,6 +4032,22 @@ genX(upload_ps)(struct brw_context *brw)
       ps._16PixelDispatchEnable = prog_data->dispatch_16;
       ps._32PixelDispatchEnable = prog_data->dispatch_32;
 
+      /* From the Sky Lake PRM 3DSTATE_PS::32 Pixel Dispatch Enable:
+       *
+       *    "When NUM_MULTISAMPLES = 16 or FORCE_SAMPLE_COUNT = 16, SIMD32
+       *    Dispatch must not be enabled for PER_PIXEL dispatch mode."
+       *
+       * Since 16x MSAA is first introduced on SKL, we don't need to apply
+       * the workaround on any older hardware.
+       *
+       * BRW_NEW_NUM_SAMPLES
+       */
+      if (GEN_GEN >= 9 && !prog_data->persample_dispatch &&
+          brw->num_samples == 16) {
+         assert(ps._8PixelDispatchEnable || ps._16PixelDispatchEnable);
+         ps._32PixelDispatchEnable = false;
+      }
+
       ps.DispatchGRFStartRegisterForConstantSetupData0 =
          brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 0);
       ps.DispatchGRFStartRegisterForConstantSetupData1 =
@@ -4057,6 +4073,7 @@ genX(upload_ps)(struct brw_context *brw)
 static const struct brw_tracked_state genX(ps_state) = {
    .dirty = {
       .mesa  = _NEW_MULTISAMPLE |
+               (GEN_GEN >= 9 ? BRW_NEW_NUM_SAMPLES : 0) |
                (GEN_GEN < 8 ? _NEW_BUFFERS |
                               _NEW_COLOR
                             : 0),