From 98dc7f56b7d17cd56ab43768058a8d9c5a8f2e0f Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 3 Mar 2020 17:03:41 -0600 Subject: [PATCH] intel/isl: Add a separate ISL_AUX_USAGE_HIZ_CCS_WT This is distinct from ISL_AUX_USAGE_HIZ_CCS in that the HiZ surface operates in write-through mode which means that the HiZ surface is only used for depth-testing acceleration and the CCS-compressed main surface is always valid so we can texture from it. Separating full HiZ from write-through mode at the isl_aux_usage level has a couple of advantages: 1. It's more explicit. Instead of write-through mode depending on the heuristic decision in isl_surf_supports_hiz_ccs_wt, it's now something that's explicitly requested by the driver. This should be more robust than hoping isl_surf_supports_hiz_ccs_wt always returns the same thing every time. If someone (say BLORP) ever drops a usage flag on the isl_surf, there's a chance it could return a different value without us noticing leading to corruptions. 2. Because ISL_AUX_USAGE_HIZ_CCS_WT is it's own isl_aux_usage flag, we can say inside the driver that HIZ_CCS does not support sampling but HIZ_CCS_WT does. We can also pass HIZ_CCS_WT to isl_surf_fill_state and it can do some validation for us beyond what we would be able to do if we conflate HIZ_CCS_WT and CCS_E. 3. In the future, we can add new heuristics to the driver which do things such as start all depth surfaces (regardless of usage flags) off in HIZ_CCS and then do a full resolve and drop to HIZ_CCS_WT the first time it gets used by the sampler. This would potentially let us enable the faster HIZ_CCS mode even in cases where it technically comes in through the API as a texture. Reviewed-by: Nanley Chery Part-of: --- src/intel/isl/isl.h | 27 +++++++++++++++- src/intel/isl/isl_aux_info.c | 1 + src/intel/isl/isl_emit_depth_stencil.c | 43 ++++++++++++++++++++++---- src/intel/isl/isl_surface_state.c | 27 ++++++++++++++-- 4 files changed, 89 insertions(+), 9 deletions(-) diff --git a/src/intel/isl/isl.h b/src/intel/isl/isl.h index 871e35b4cb8..fbf31b19d00 100644 --- a/src/intel/isl/isl.h +++ b/src/intel/isl/isl.h @@ -617,7 +617,30 @@ enum isl_aux_usage { */ ISL_AUX_USAGE_MC, - /** The auxiliary surface is a HiZ surface and CCS is also enabled */ + /** The auxiliary surface is a HiZ surface operating in write-through mode + * and CCS is also enabled + * + * In this mode, the HiZ and CCS surfaces act as a single fused compression + * surface where resolves and ambiguates operate on both surfaces at the + * same time. In this mode, the HiZ surface operates in write-through + * mode where it is only used for accelerating depth testing and not for + * actual compression. The CCS-compressed surface contains valid data at + * all times. + * + * @invariant isl_surf::samples == 1 + */ + ISL_AUX_USAGE_HIZ_CCS_WT, + + /** The auxiliary surface is a HiZ surface with and CCS is also enabled + * + * In this mode, the HiZ and CCS surfaces act as a single fused compression + * surface where resolves and ambiguates operate on both surfaces at the + * same time. In this mode, full HiZ compression is enabled and the + * CCS-compressed main surface may not contain valid data. The only way to + * read the surface outside of the depth hardware is to do a full resolve + * which resolves both HiZ and CCS so the surface is in the pass-through + * state. + */ ISL_AUX_USAGE_HIZ_CCS, /** The auxiliary surface is an MCS and CCS is also enabled @@ -1737,6 +1760,7 @@ static inline bool isl_aux_usage_has_hiz(enum isl_aux_usage usage) { return usage == ISL_AUX_USAGE_HIZ || + usage == ISL_AUX_USAGE_HIZ_CCS_WT || usage == ISL_AUX_USAGE_HIZ_CCS; } @@ -1753,6 +1777,7 @@ isl_aux_usage_has_ccs(enum isl_aux_usage usage) return usage == ISL_AUX_USAGE_CCS_D || usage == ISL_AUX_USAGE_CCS_E || usage == ISL_AUX_USAGE_MC || + usage == ISL_AUX_USAGE_HIZ_CCS_WT || usage == ISL_AUX_USAGE_HIZ_CCS || usage == ISL_AUX_USAGE_MCS_CCS; } diff --git a/src/intel/isl/isl_aux_info.c b/src/intel/isl/isl_aux_info.c index 4fe6e516fc3..556827b97bf 100644 --- a/src/intel/isl/isl_aux_info.c +++ b/src/intel/isl/isl_aux_info.c @@ -65,6 +65,7 @@ static const struct aux_usage_info info[] = { /* write_behavior c fc pr fra */ AUX( COMPRESS, Y, Y, x, x, HIZ) AUX( COMPRESS, Y, Y, x, x, HIZ_CCS) + AUX( COMPRESS, Y, Y, x, x, HIZ_CCS_WT) AUX( COMPRESS, Y, Y, Y, x, MCS) AUX( COMPRESS, Y, Y, Y, x, MCS_CCS) AUX( COMPRESS, Y, Y, Y, Y, CCS_E) diff --git a/src/intel/isl/isl_emit_depth_stencil.c b/src/intel/isl/isl_emit_depth_stencil.c index c62639955eb..1ca40f63e48 100644 --- a/src/intel/isl/isl_emit_depth_stencil.c +++ b/src/intel/isl/isl_emit_depth_stencil.c @@ -130,7 +130,7 @@ isl_genX(emit_depth_stencil_hiz_s)(const struct isl_device *dev, void *batch, #if GEN_GEN >= 12 db.ControlSurfaceEnable = db.DepthBufferCompressionEnable = - info->hiz_usage == ISL_AUX_USAGE_HIZ_CCS; + isl_aux_usage_has_ccs(info->hiz_usage); #endif } @@ -203,10 +203,8 @@ isl_genX(emit_depth_stencil_hiz_s)(const struct isl_device *dev, void *batch, }; assert(info->hiz_usage == ISL_AUX_USAGE_NONE || - info->hiz_usage == ISL_AUX_USAGE_HIZ || - info->hiz_usage == ISL_AUX_USAGE_HIZ_CCS); - if (info->hiz_usage == ISL_AUX_USAGE_HIZ || - info->hiz_usage == ISL_AUX_USAGE_HIZ_CCS) { + isl_aux_usage_has_hiz(info->hiz_usage)); + if (isl_aux_usage_has_hiz(info->hiz_usage)) { assert(GEN_GEN >= 12 || info->hiz_usage == ISL_AUX_USAGE_HIZ); db.HierarchicalDepthBufferEnable = true; @@ -216,7 +214,40 @@ isl_genX(emit_depth_stencil_hiz_s)(const struct isl_device *dev, void *batch, #if GEN_GEN >= 12 hiz.HierarchicalDepthBufferWriteThruEnable = isl_surf_supports_hiz_ccs_wt(dev->info, info->depth_surf, - info->hiz_usage); + info->hiz_usage) || + info->hiz_usage == ISL_AUX_USAGE_HIZ_CCS_WT; + + /* The bspec docs for this bit are fairly unclear about exactly what is + * and isn't supported with HiZ write-through. It's fairly clear that + * you can't sample from a multisampled depth buffer with CCS. This + * limitation isn't called out explicitly but the docs for the CCS_E + * value of RENDER_SURFACE_STATE::AuxiliarySurfaceMode say: + * + * "If Number of multisamples > 1, programming this value means MSAA + * compression is enabled for that surface. Auxillary surface is MSC + * with tile y." + * + * Since this interpretation ignores whether the surface is + * depth/stencil or not and since multisampled depth buffers use + * ISL_MSAA_LAYOUT_INTERLEAVED which is incompatible with MCS + * compression, this means that we can't even specify MSAA depth CCS in + * RENDER_SURFACE_STATE::AuxiliarySurfaceMode. The BSpec also says, for + * 3DSTATE_HIER_DEPTH_BUFFER::HierarchicalDepthBufferWriteThruEnable, + * + * "This bit must NOT be set for >1x MSAA modes, since sampler + * doesn't support sampling from >1x MSAA depth buffer." + * + * Again, this is all focused around what the sampler can do and not + * what the depth hardware can do. + * + * Reading even more internal docs which can't be quoted here makes it + * pretty clear that, even if it's not currently called out in the + * BSpec, HiZ+CCS write-through isn't intended to work with MSAA and we + * shouldn't try to use it. Treat it as if it's disallowed even if the + * BSpec doesn't explicitly document that. + */ + if (hiz.HierarchicalDepthBufferWriteThruEnable) + assert(info->depth_surf->samples == 1); #endif #if GEN_GEN >= 8 diff --git a/src/intel/isl/isl_surface_state.c b/src/intel/isl/isl_surface_state.c index e44e16c87fa..4b8592513d4 100644 --- a/src/intel/isl/isl_surface_state.c +++ b/src/intel/isl/isl_surface_state.c @@ -91,6 +91,7 @@ static const uint32_t isl_to_gen_aux_mode[] = { [ISL_AUX_USAGE_NONE] = AUX_NONE, [ISL_AUX_USAGE_MCS] = AUX_CCS_E, [ISL_AUX_USAGE_CCS_E] = AUX_CCS_E, + [ISL_AUX_USAGE_HIZ_CCS_WT] = AUX_CCS_E, [ISL_AUX_USAGE_MCS_CCS] = AUX_MCS_LCE, }; #elif GEN_GEN >= 9 @@ -559,6 +560,7 @@ isl_genX(surf_fill_state_s)(const struct isl_device *dev, void *state, if (GEN_GEN >= 12) { assert(info->aux_usage == ISL_AUX_USAGE_MCS || info->aux_usage == ISL_AUX_USAGE_CCS_E || + info->aux_usage == ISL_AUX_USAGE_HIZ_CCS_WT || info->aux_usage == ISL_AUX_USAGE_MCS_CCS); } else if (GEN_GEN >= 9) { assert(info->aux_usage == ISL_AUX_USAGE_HIZ || @@ -580,8 +582,29 @@ isl_genX(surf_fill_state_s)(const struct isl_device *dev, void *state, */ assert(!(info->view->usage & ISL_SURF_USAGE_STORAGE_BIT)); - if (info->aux_usage == ISL_AUX_USAGE_HIZ) { - /* The number of samples must be 1 */ + if (isl_aux_usage_has_hiz(info->aux_usage)) { + /* For Gen8-10, there are some restrictions around sampling from HiZ. + * The Skylake PRM docs for RENDER_SURFACE_STATE::AuxiliarySurfaceMode + * say: + * + * "If this field is set to AUX_HIZ, Number of Multisamples must + * be MULTISAMPLECOUNT_1, and Surface Type cannot be SURFTYPE_3D." + * + * On Gen12, the docs are a bit less obvious but the restriction is + * the same. The limitation isn't called out explicitly but the docs + * for the CCS_E value of RENDER_SURFACE_STATE::AuxiliarySurfaceMode + * say: + * + * "If Number of multisamples > 1, programming this value means + * MSAA compression is enabled for that surface. Auxillary surface + * is MSC with tile y." + * + * Since this interpretation ignores whether the surface is + * depth/stencil or not and since multisampled depth buffers use + * ISL_MSAA_LAYOUT_INTERLEAVED which is incompatible with MCS + * compression, this means that we can't even specify MSAA depth CCS + * in RENDER_SURFACE_STATE::AuxiliarySurfaceMode. + */ assert(info->surf->samples == 1); /* The dimension must not be 3D */ -- 2.30.2