From: Jason Ekstrand Date: Sat, 3 Aug 2019 16:37:34 +0000 (-0500) Subject: iris: Fix handling of SIMD32 fragment shaders X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=aebca3961b4161574b889201f48c38bd73e2f7c3;p=mesa.git iris: Fix handling of SIMD32 fragment shaders The brw_wm_prog_data_dispatch_grf_start_reg and _prog_offset helpers read the _NPixelDispatchEnable fields from 3DSTATE_PS to figure out which bits to pull out of the prog data and stuff where. Therefore, they need to be called with the final set of _NPixelDispatchEnable bits after we've done the workaround for SIMD32 and 16x MSAA. Otherwise, if you end up with a somewhat odd combination of enables, the GRF start reg and KSP data ends up in the wrong slots. In particular, running SIMD32-only is broken but several other combinations are as well. Fixes: 5445c176e27ba "iris: Disable SIMD32 when using a 16x MSAA..." Reviewed-by: Kenneth Graunke --- diff --git a/src/gallium/drivers/iris/iris_state.c b/src/gallium/drivers/iris/iris_state.c index 2c3b18b19bc..13522e5d0c4 100644 --- a/src/gallium/drivers/iris/iris_state.c +++ b/src/gallium/drivers/iris/iris_state.c @@ -3645,23 +3645,6 @@ iris_store_fs_state(struct iris_context *ice, */ ps.PositionXYOffsetSelect = wm_prog_data->uses_pos_offset ? POSOFFSET_SAMPLE : POSOFFSET_NONE; - ps._8PixelDispatchEnable = wm_prog_data->dispatch_8; - ps._16PixelDispatchEnable = wm_prog_data->dispatch_16; - /* ps._32PixelDispatchEnable is filled in at draw time. */ - - ps.DispatchGRFStartRegisterForConstantSetupData0 = - brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 0); - ps.DispatchGRFStartRegisterForConstantSetupData1 = - brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 1); - ps.DispatchGRFStartRegisterForConstantSetupData2 = - brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 2); - - ps.KernelStartPointer0 = - KSP(shader) + brw_wm_prog_data_prog_offset(wm_prog_data, ps, 0); - ps.KernelStartPointer1 = - KSP(shader) + brw_wm_prog_data_prog_offset(wm_prog_data, ps, 1); - ps.KernelStartPointer2 = - KSP(shader) + brw_wm_prog_data_prog_offset(wm_prog_data, ps, 2); if (prog_data->total_scratch) { struct iris_bo *bo = @@ -3686,8 +3669,6 @@ iris_store_fs_state(struct iris_context *ice, #if GEN_GEN >= 9 psx.PixelShaderPullsBary = wm_prog_data->pulls_bary; psx.PixelShaderComputesStencil = wm_prog_data->computed_stencil; -#else - psx.PixelShaderUsesInputCoverageMask = wm_prog_data->uses_sample_mask; #endif } } @@ -4655,47 +4636,72 @@ iris_upload_dirty_render_state(struct iris_context *ice, iris_get_scratch_space(ice, prog_data->total_scratch, stage); iris_use_pinned_bo(batch, bo, true); } -#if GEN_GEN >= 9 - if (stage == MESA_SHADER_FRAGMENT && wm_prog_data->uses_sample_mask) { - uint32_t *shader_ps = (uint32_t *) shader->derived_data; - uint32_t *shader_psx = shader_ps + GENX(3DSTATE_PS_length); - uint32_t ps_state[GENX(3DSTATE_PS_length)] = {0}; - uint32_t psx_state[GENX(3DSTATE_PS_EXTRA_length)] = {0}; - struct iris_rasterizer_state *cso = ice->state.cso_rast; + + if (stage == MESA_SHADER_FRAGMENT) { + UNUSED struct iris_rasterizer_state *cso = ice->state.cso_rast; struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer; - /* The docs for 3DSTATE_PS::32 Pixel Dispatch Enable say: - * - * "When NUM_MULTISAMPLES = 16 or FORCE_SAMPLE_COUNT = 16, - * SIMD32 Dispatch must not be enabled for PER_PIXEL dispatch - * mode." - * - * 16x MSAA only exists on Gen9+, so we can skip this on Gen8. - */ - iris_pack_command(GENX(3DSTATE_PS), &ps_state, ps) { - ps._32PixelDispatchEnable = wm_prog_data->dispatch_32 && - (cso_fb->samples != 16 || wm_prog_data->persample_dispatch); + uint32_t ps_state[GENX(3DSTATE_PS_length)] = {0}; + iris_pack_command(GENX(3DSTATE_PS), ps_state, ps) { + ps._8PixelDispatchEnable = wm_prog_data->dispatch_8; + ps._16PixelDispatchEnable = wm_prog_data->dispatch_16; + ps._32PixelDispatchEnable = wm_prog_data->dispatch_32; + + /* The docs for 3DSTATE_PS::32 Pixel Dispatch Enable say: + * + * "When NUM_MULTISAMPLES = 16 or FORCE_SAMPLE_COUNT = 16, + * SIMD32 Dispatch must not be enabled for PER_PIXEL dispatch + * mode." + * + * 16x MSAA only exists on Gen9+, so we can skip this on Gen8. + */ + if (GEN_GEN >= 9 && cso_fb->samples == 16 && + !wm_prog_data->persample_dispatch) { + assert(ps._8PixelDispatchEnable || ps._16PixelDispatchEnable); + ps._32PixelDispatchEnable = false; + } + + ps.DispatchGRFStartRegisterForConstantSetupData0 = + brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 0); + ps.DispatchGRFStartRegisterForConstantSetupData1 = + brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 1); + ps.DispatchGRFStartRegisterForConstantSetupData2 = + brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 2); + + ps.KernelStartPointer0 = KSP(shader) + + brw_wm_prog_data_prog_offset(wm_prog_data, ps, 0); + ps.KernelStartPointer1 = KSP(shader) + + brw_wm_prog_data_prog_offset(wm_prog_data, ps, 1); + ps.KernelStartPointer2 = KSP(shader) + + brw_wm_prog_data_prog_offset(wm_prog_data, ps, 2); } - iris_pack_command(GENX(3DSTATE_PS_EXTRA), &psx_state, psx) { + uint32_t psx_state[GENX(3DSTATE_PS_EXTRA_length)] = {0}; + iris_pack_command(GENX(3DSTATE_PS_EXTRA), psx_state, psx) { +#if GEN_GEN >= 9 if (wm_prog_data->post_depth_coverage) psx.InputCoverageMaskState = ICMS_DEPTH_COVERAGE; - else if (wm_prog_data->inner_coverage && cso->conservative_rasterization) + else if (wm_prog_data->inner_coverage && + cso->conservative_rasterization) psx.InputCoverageMaskState = ICMS_INNER_CONSERVATIVE; else psx.InputCoverageMaskState = ICMS_NORMAL; +#else + psx.PixelShaderUsesInputCoverageMask = + wm_prog_data->uses_sample_mask; +#endif } + uint32_t *shader_ps = (uint32_t *) shader->derived_data; + uint32_t *shader_psx = shader_ps + GENX(3DSTATE_PS_length); iris_emit_merge(batch, shader_ps, ps_state, GENX(3DSTATE_PS_length)); - iris_emit_merge(batch, - shader_psx, - psx_state, + iris_emit_merge(batch, shader_psx, psx_state, GENX(3DSTATE_PS_EXTRA_length)); - } else -#endif + } else { iris_batch_emit(batch, shader->derived_data, iris_derived_program_state_size(stage)); + } } else { if (stage == MESA_SHADER_TESS_EVAL) { iris_emit_cmd(batch, GENX(3DSTATE_HS), hs);