I haven't measured this, but it can only help.
Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4866>
tex->stencil_dirty_level_mask &= ~levels_s;
}
+ /* We just had to completely decompress Z/S for texturing. Enable
+ * TC-compatible HTILE on the next clear, so that the decompression
+ * doesn't have to be done for this texture ever again.
+ *
+ * TC-compatible HTILE might slightly reduce Z/S performance, but
+ * the decompression is much worse.
+ */
+ if (has_htile && !tc_compat_htile &&
+ tex->surface.flags & RADEON_SURF_TC_COMPATIBLE_HTILE)
+ tex->enable_tc_compatible_htile_next_clear = true;
+
/* Only in-place decompression needs to flush DB caches, or
* when we don't decompress but TC-compatible planes are dirty.
*/
if (zstex && zsbuf->u.tex.first_layer == 0 &&
zsbuf->u.tex.last_layer == util_max_layer(&zstex->buffer.b.b, 0)) {
+ /* See whether we should enable TC-compatible HTILE. */
+ if (zstex->enable_tc_compatible_htile_next_clear &&
+ !zstex->tc_compatible_htile &&
+ si_htile_enabled(zstex, zsbuf->u.tex.level, PIPE_MASK_ZS) &&
+ /* If both depth and stencil are present, they must be cleared together. */
+ ((buffers & PIPE_CLEAR_DEPTHSTENCIL) == PIPE_CLEAR_DEPTHSTENCIL ||
+ (buffers & PIPE_CLEAR_DEPTH && (!zstex->surface.has_stencil ||
+ zstex->htile_stencil_disabled)))) {
+ /* Enable TC-compatible HTILE. */
+ zstex->enable_tc_compatible_htile_next_clear = false;
+ zstex->tc_compatible_htile = true;
+
+ /* Update the framebuffer state to reflect the change. */
+ sctx->framebuffer.DB_has_shader_readable_metadata = true;
+ sctx->framebuffer.dirty_zsbuf = true;
+ si_mark_atom_dirty(sctx, &sctx->atoms.s.framebuffer);
+
+ /* Update all sampler views and shader images in all contexts. */
+ p_atomic_inc(&sctx->screen->dirty_tex_counter);
+
+ /* Re-initialize HTILE, so that it doesn't contain values incompatible
+ * with the new TC-compatible HTILE setting.
+ *
+ * 0xfffff30f = uncompressed Z + S
+ * 0xfffc000f = uncompressed Z only
+ *
+ * GFX8 always uses the Z+S HTILE format for TC-compatible HTILE even
+ * when stencil is not present.
+ */
+ uint32_t clear_value = (zstex->surface.has_stencil &&
+ !zstex->htile_stencil_disabled) ||
+ sctx->chip_class == GFX8 ? 0xfffff30f : 0xfffc000f;
+ si_clear_buffer(sctx, &zstex->buffer.b.b, zstex->surface.htile_offset,
+ zstex->surface.htile_size, &clear_value, 4,
+ SI_COHERENCY_DB_META, false);
+ }
+
/* TC-compatible HTILE only supports depth clears to 0 or 1. */
if (buffers & PIPE_CLEAR_DEPTH && si_htile_enabled(zstex, zsbuf->u.tex.level, PIPE_MASK_Z) &&
(!zstex->tc_compatible_htile || depth == 0 || depth == 1)) {
static enum si_cache_policy get_cache_policy(struct si_context *sctx, enum si_coherency coher,
uint64_t size)
{
- if ((sctx->chip_class >= GFX9 && (coher == SI_COHERENCY_CB_META || coher == SI_COHERENCY_CP)) ||
+ if ((sctx->chip_class >= GFX9 && (coher == SI_COHERENCY_CB_META ||
+ coher == SI_COHERENCY_DB_META ||
+ coher == SI_COHERENCY_CP)) ||
(sctx->chip_class >= GFX7 && coher == SI_COHERENCY_SHADER))
return size <= 256 * 1024 ? L2_LRU : L2_STREAM;
(cache_policy == L2_BYPASS ? SI_CONTEXT_INV_L2 : 0);
case SI_COHERENCY_CB_META:
return SI_CONTEXT_FLUSH_AND_INV_CB;
+ case SI_COHERENCY_DB_META:
+ return SI_CONTEXT_FLUSH_AND_INV_DB;
}
}
SI_COHERENCY_NONE, /* no cache flushes needed */
SI_COHERENCY_SHADER,
SI_COHERENCY_CB_META,
+ SI_COHERENCY_DB_META,
SI_COHERENCY_CP,
};
uint8_t stencil_clear_value;
bool fmask_is_identity : 1;
bool tc_compatible_htile : 1;
+ bool enable_tc_compatible_htile_next_clear : 1;
bool htile_stencil_disabled : 1;
bool depth_cleared : 1; /* if it was cleared at least once */
bool stencil_cleared : 1; /* if it was cleared at least once */
/* don't include stencil-only formats which we don't support for rendering */
tex->is_depth = util_format_has_depth(util_format_description(tex->buffer.b.b.format));
tex->surface = *surface;
- tex->tc_compatible_htile =
- tex->surface.htile_size != 0 && (tex->surface.flags & RADEON_SURF_TC_COMPATIBLE_HTILE);
+ tex->tc_compatible_htile = false; /* This will be enabled on demand. */
/* TC-compatible HTILE:
* - GFX8 only supports Z32_FLOAT.
* - GFX9 only supports Z32_FLOAT and Z16_UNORM. */
- if (tex->tc_compatible_htile) {
+ if (tex->surface.flags & RADEON_SURF_TC_COMPATIBLE_HTILE) {
if (sscreen->info.chip_class >= GFX9 && base->format == PIPE_FORMAT_Z16_UNORM)
tex->db_render_format = base->format;
else {