From b95b0e2918c052068caeb4f6c2802ba89be043a3 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 18 May 2018 16:39:21 -0700 Subject: [PATCH] intel/anv,blorp,i965: Implement the SKL 16x MSAA SIMD32 workaround Reviewed-by: Kenneth Graunke --- src/intel/blorp/blorp_genX_exec.h | 14 +++++++++++++ src/intel/vulkan/genX_pipeline.c | 20 +++++++++++++++++-- src/mesa/drivers/dri/i965/genX_state_upload.c | 17 ++++++++++++++++ 3 files changed, 49 insertions(+), 2 deletions(-) diff --git a/src/intel/blorp/blorp_genX_exec.h b/src/intel/blorp/blorp_genX_exec.h index 13bdd851e94..8bd9174b677 100644 --- a/src/intel/blorp/blorp_genX_exec.h +++ b/src/intel/blorp/blorp_genX_exec.h @@ -767,6 +767,20 @@ blorp_emit_ps_config(struct blorp_batch *batch, ps._16PixelDispatchEnable = prog_data->dispatch_16; ps._32PixelDispatchEnable = prog_data->dispatch_32; + /* From the Sky Lake PRM 3DSTATE_PS::32 Pixel Dispatch Enable: + * + * "When NUM_MULTISAMPLES = 16 or FORCE_SAMPLE_COUNT = 16, SIMD32 + * Dispatch must not be enabled for PER_PIXEL dispatch mode." + * + * Since 16x MSAA is first introduced on SKL, we don't need to apply + * the workaround on any older hardware. + */ + if (GEN_GEN >= 9 && !prog_data->persample_dispatch && + params->num_samples == 16) { + assert(ps._8PixelDispatchEnable || ps._16PixelDispatchEnable); + ps._32PixelDispatchEnable = false; + } + ps.DispatchGRFStartRegisterForConstantSetupData0 = brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 0); ps.DispatchGRFStartRegisterForConstantSetupData1 = diff --git a/src/intel/vulkan/genX_pipeline.c b/src/intel/vulkan/genX_pipeline.c index 15b1e0b3880..197899fb2e3 100644 --- a/src/intel/vulkan/genX_pipeline.c +++ b/src/intel/vulkan/genX_pipeline.c @@ -1445,7 +1445,8 @@ is_dual_src_blend_factor(VkBlendFactor factor) static void emit_3dstate_ps(struct anv_pipeline *pipeline, - const VkPipelineColorBlendStateCreateInfo *blend) + const VkPipelineColorBlendStateCreateInfo *blend, + const VkPipelineMultisampleStateCreateInfo *multisample) { MAYBE_UNUSED const struct gen_device_info *devinfo = &pipeline->device->info; const struct anv_shader_bin *fs_bin = @@ -1492,6 +1493,20 @@ emit_3dstate_ps(struct anv_pipeline *pipeline, ps._16PixelDispatchEnable = wm_prog_data->dispatch_16; ps._32PixelDispatchEnable = wm_prog_data->dispatch_32; + /* From the Sky Lake PRM 3DSTATE_PS::32 Pixel Dispatch Enable: + * + * "When NUM_MULTISAMPLES = 16 or FORCE_SAMPLE_COUNT = 16, SIMD32 + * Dispatch must not be enabled for PER_PIXEL dispatch mode." + * + * Since 16x MSAA is first introduced on SKL, we don't need to apply + * the workaround on any older hardware. + */ + if (GEN_GEN >= 9 && !wm_prog_data->persample_dispatch && + multisample && multisample->rasterizationSamples == 16) { + assert(ps._8PixelDispatchEnable || ps._16PixelDispatchEnable); + ps._32PixelDispatchEnable = false; + } + ps.KernelStartPointer0 = fs_bin->kernel.offset + brw_wm_prog_data_prog_offset(wm_prog_data, ps, 0); ps.KernelStartPointer1 = fs_bin->kernel.offset + @@ -1733,7 +1748,8 @@ genX(graphics_pipeline_create)( emit_3dstate_sbe(pipeline); emit_3dstate_wm(pipeline, subpass, pCreateInfo->pColorBlendState, pCreateInfo->pMultisampleState); - emit_3dstate_ps(pipeline, pCreateInfo->pColorBlendState); + emit_3dstate_ps(pipeline, pCreateInfo->pColorBlendState, + pCreateInfo->pMultisampleState); #if GEN_GEN >= 8 emit_3dstate_ps_extra(pipeline, subpass, pCreateInfo->pColorBlendState); emit_3dstate_vf_topology(pipeline); diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c b/src/mesa/drivers/dri/i965/genX_state_upload.c index 42cd08ceba9..b279f01e1a1 100644 --- a/src/mesa/drivers/dri/i965/genX_state_upload.c +++ b/src/mesa/drivers/dri/i965/genX_state_upload.c @@ -4032,6 +4032,22 @@ genX(upload_ps)(struct brw_context *brw) ps._16PixelDispatchEnable = prog_data->dispatch_16; ps._32PixelDispatchEnable = prog_data->dispatch_32; + /* From the Sky Lake PRM 3DSTATE_PS::32 Pixel Dispatch Enable: + * + * "When NUM_MULTISAMPLES = 16 or FORCE_SAMPLE_COUNT = 16, SIMD32 + * Dispatch must not be enabled for PER_PIXEL dispatch mode." + * + * Since 16x MSAA is first introduced on SKL, we don't need to apply + * the workaround on any older hardware. + * + * BRW_NEW_NUM_SAMPLES + */ + if (GEN_GEN >= 9 && !prog_data->persample_dispatch && + brw->num_samples == 16) { + assert(ps._8PixelDispatchEnable || ps._16PixelDispatchEnable); + ps._32PixelDispatchEnable = false; + } + ps.DispatchGRFStartRegisterForConstantSetupData0 = brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 0); ps.DispatchGRFStartRegisterForConstantSetupData1 = @@ -4057,6 +4073,7 @@ genX(upload_ps)(struct brw_context *brw) static const struct brw_tracked_state genX(ps_state) = { .dirty = { .mesa = _NEW_MULTISAMPLE | + (GEN_GEN >= 9 ? BRW_NEW_NUM_SAMPLES : 0) | (GEN_GEN < 8 ? _NEW_BUFFERS | _NEW_COLOR : 0), -- 2.30.2