X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fintel%2Fblorp%2Fblorp_genX_exec.h;h=6e74683f4e0c4d533e4e34af9a015d5aa8e60d9b;hb=39ad0c2af8b40c728a91bebf05b365803d68022e;hp=84b7cac7e679c4d3515d5f67b73891fa21915148;hpb=e1bdb127b6875df602bd736465d597725f326621;p=mesa.git diff --git a/src/intel/blorp/blorp_genX_exec.h b/src/intel/blorp/blorp_genX_exec.h index 84b7cac7e67..6e74683f4e0 100644 --- a/src/intel/blorp/blorp_genX_exec.h +++ b/src/intel/blorp/blorp_genX_exec.h @@ -27,6 +27,7 @@ #include "blorp_priv.h" #include "dev/gen_device_info.h" #include "common/gen_sample_positions.h" +#include "common/gen_l3_config.h" #include "genxml/gen_macros.h" /** @@ -65,10 +66,8 @@ blorp_vf_invalidate_for_vb_48b_transitions(struct blorp_batch *batch, uint32_t *sizes, unsigned num_vbs); -#if GEN_GEN >= 8 -static struct blorp_address +UNUSED static struct blorp_address blorp_get_workaround_page(struct blorp_batch *batch); -#endif static void blorp_alloc_binding_table(struct blorp_batch *batch, unsigned num_entries, @@ -92,9 +91,14 @@ static struct blorp_address blorp_get_surface_base_address(struct blorp_batch *batch); #endif +#if GEN_GEN >= 7 +static const struct gen_l3_config * +blorp_get_l3_config(struct blorp_batch *batch); +# else static void blorp_emit_urb_config(struct blorp_batch *batch, unsigned vs_entry_size, unsigned sf_entry_size); +#endif static void blorp_emit_pipeline(struct blorp_batch *batch, @@ -185,7 +189,8 @@ _blorp_combine_address(struct blorp_batch *batch, void *location, */ static void emit_urb_config(struct blorp_batch *batch, - const struct blorp_params *params) + const struct blorp_params *params, + enum gen_urb_deref_block_size *deref_block_size) { /* Once vertex fetcher has written full VUE entries with complete * header the space requirement is as follows per vertex (in bytes): @@ -207,7 +212,43 @@ emit_urb_config(struct blorp_batch *batch, const unsigned sf_entry_size = params->sf_prog_data ? params->sf_prog_data->urb_entry_size : 0; +#if GEN_GEN >= 7 + assert(sf_entry_size == 0); + const unsigned entry_size[4] = { vs_entry_size, 1, 1, 1 }; + + unsigned entries[4], start[4]; + gen_get_urb_config(batch->blorp->compiler->devinfo, + blorp_get_l3_config(batch), + false, false, entry_size, + entries, start, deref_block_size); + +#if GEN_GEN == 7 && !GEN_IS_HASWELL + /* From the IVB PRM Vol. 2, Part 1, Section 3.2.1: + * + * "A PIPE_CONTROL with Post-Sync Operation set to 1h and a depth stall + * needs to be sent just prior to any 3DSTATE_VS, 3DSTATE_URB_VS, + * 3DSTATE_CONSTANT_VS, 3DSTATE_BINDING_TABLE_POINTER_VS, + * 3DSTATE_SAMPLER_STATE_POINTER_VS command. Only one PIPE_CONTROL + * needs to be sent before any combination of VS associated 3DSTATE." + */ + blorp_emit(batch, GENX(PIPE_CONTROL), pc) { + pc.DepthStallEnable = true; + pc.PostSyncOperation = WriteImmediateData; + pc.Address = blorp_get_workaround_page(batch); + } +#endif + + for (int i = 0; i <= MESA_SHADER_GEOMETRY; i++) { + blorp_emit(batch, GENX(3DSTATE_URB_VS), urb) { + urb._3DCommandSubOpcode += i; + urb.VSURBStartingAddress = start[i]; + urb.VSURBEntryAllocationSize = entry_size[i] - 1; + urb.VSNumberofURBEntries = entries[i]; + } + } +#else /* GEN_GEN < 7 */ blorp_emit_urb_config(batch, vs_entry_size, sf_entry_size); +#endif } #if GEN_GEN >= 7 @@ -646,7 +687,8 @@ blorp_emit_vs_config(struct blorp_batch *batch, static void blorp_emit_sf_config(struct blorp_batch *batch, - const struct blorp_params *params) + const struct blorp_params *params, + enum gen_urb_deref_block_size urb_deref_block_size) { const struct brw_wm_prog_data *prog_data = params->wm_prog_data; @@ -673,7 +715,7 @@ blorp_emit_sf_config(struct blorp_batch *batch, blorp_emit(batch, GENX(3DSTATE_SF), sf) { #if GEN_GEN >= 12 - sf.DerefBlockSize = PerPolyDerefMode; + sf.DerefBlockSize = urb_deref_block_size; #endif } @@ -860,6 +902,10 @@ blorp_emit_ps_config(struct blorp_batch *batch, psx.PixelShaderValid = true; psx.AttributeEnable = prog_data->num_varying_inputs > 0; psx.PixelShaderIsPerSample = prog_data->persample_dispatch; + psx.PixelShaderComputedDepthMode = prog_data->computed_depth_mode; +#if GEN_GEN >= 9 + psx.PixelShaderComputesStencil = prog_data->computed_stencil; +#endif } if (params->src.enabled) @@ -885,8 +931,10 @@ blorp_emit_ps_config(struct blorp_batch *batch, unreachable("not reached"); } - if (prog_data) + if (prog_data) { wm.ThreadDispatchEnable = true; + wm.PixelShaderComputedDepthMode = prog_data->computed_depth_mode; + } if (params->src.enabled) wm.PixelShaderKillsPixel = true; @@ -1216,7 +1264,8 @@ blorp_emit_pipeline(struct blorp_batch *batch, uint32_t color_calc_state_offset; uint32_t depth_stencil_state_offset; - emit_urb_config(batch, params); + enum gen_urb_deref_block_size urb_deref_block_size; + emit_urb_config(batch, params, &urb_deref_block_size); if (params->wm_prog_data) { blend_state_offset = blorp_emit_blend_state(batch, params); @@ -1297,10 +1346,15 @@ blorp_emit_pipeline(struct blorp_batch *batch, clip.PerspectiveDivideDisable = true; } - blorp_emit_sf_config(batch, params); + blorp_emit_sf_config(batch, params, urb_deref_block_size); blorp_emit_ps_config(batch, params); blorp_emit_cc_viewport(batch); + +#if GEN_GEN >= 12 + /* Disable Primitive Replication. */ + blorp_emit(batch, GENX(3DSTATE_PRIMITIVE_REPLICATION), pr); +#endif } /******** This is the end of the pipeline setup code ********/ @@ -1700,14 +1754,38 @@ blorp_emit_gen8_hiz_op(struct blorp_batch *batch, blorp_emit_cc_viewport(batch); } - /* According to the SKL PRM formula for WM_INT::ThreadDispatchEnable, the - * 3DSTATE_WM::ForceThreadDispatchEnable field can force WM thread dispatch - * even when WM_HZ_OP is active. However, WM thread dispatch is normally - * disabled for HiZ ops and it appears that force-enabling it can lead to - * GPU hangs on at least Skylake. Since we don't know the current state of - * the 3DSTATE_WM packet, just emit a dummy one prior to 3DSTATE_WM_HZ_OP. - */ - blorp_emit(batch, GENX(3DSTATE_WM), wm); + if (GEN_GEN >= 12 && params->stencil.enabled && + params->hiz_op == ISL_AUX_OP_FULL_RESOLVE) { + /* GEN:BUG:1605967699 + * + * This workaround requires that the Force Thread Dispatch Enable flag + * needs to be set to ForceOFF on the first WM_HZ_OP state cycle + * (followed by a CS Stall): + * + * "Workaround: There is a potential software workaround for the + * issue by doing these 2 steps 1) setting the force thread dispatch + * enable(bits 20:19) in the 3dstate_WM_body state to be set to + * Force_OFF (value of 1) along with the first WM_HZ_OP state cycle. + * The second WM_HZ_OP state which is required by programming + * sequencing to complete the HZ_OP operation can reprogram the + * 3dstate_WM_body to set to NORMAL(value of 0)." + */ + blorp_emit(batch, GENX(3DSTATE_WM), wm) { + wm.ForceThreadDispatchEnable = ForceOff; + } + blorp_emit(batch, GENX(PIPE_CONTROL), pipe) { + pipe.CommandStreamerStallEnable = true; + } + } else { + /* According to the SKL PRM formula for WM_INT::ThreadDispatchEnable, the + * 3DSTATE_WM::ForceThreadDispatchEnable field can force WM thread dispatch + * even when WM_HZ_OP is active. However, WM thread dispatch is normally + * disabled for HiZ ops and it appears that force-enabling it can lead to + * GPU hangs on at least Skylake. Since we don't know the current state of + * the 3DSTATE_WM packet, just emit a dummy one prior to 3DSTATE_WM_HZ_OP. + */ + blorp_emit(batch, GENX(3DSTATE_WM), wm); + } /* If we can't alter the depth stencil config and multiple layers are * involved, the HiZ op will fail. This is because the op requires that a @@ -1732,7 +1810,7 @@ blorp_emit_gen8_hiz_op(struct blorp_batch *batch, hzp.DepthBufferResolveEnable = params->depth.enabled; #if GEN_GEN >= 12 if (params->stencil.enabled) { - assert(params->stencil.aux_usage == ISL_AUX_USAGE_CCS_E); + assert(params->stencil.aux_usage == ISL_AUX_USAGE_STC_CCS); hzp.StencilBufferResolveEnable = true; } #endif @@ -1769,6 +1847,18 @@ blorp_emit_gen8_hiz_op(struct blorp_batch *batch, pc.Address = blorp_get_workaround_page(batch); } + + if (GEN_GEN >= 12 && params->stencil.enabled && + params->hiz_op == ISL_AUX_OP_FULL_RESOLVE) { + /* GEN:BUG:1605967699 + * + * The second WM_HZ_OP state which is required by programming + * sequencing to complete the HZ_OP operation can reprogram the + * 3dstate_WM_body to set to NORMAL(value of 0)." + */ + blorp_emit(batch, GENX(3DSTATE_WM), wm); + } + blorp_emit(batch, GENX(3DSTATE_WM_HZ_OP), hzp); } #endif