From: Marek Olšák Date: Sat, 19 Aug 2017 13:06:22 +0000 (+0200) Subject: radeonsi/gfx9: don't flush L2 metadata for DB if not needed X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=7dec48b81e88dc933a23d12d3a71a4c6b9f216e4;p=mesa.git radeonsi/gfx9: don't flush L2 metadata for DB if not needed Reviewed-by: Nicolai Hähnle --- diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c index 5545f5b2a5b..4e3b707b6cf 100644 --- a/src/gallium/drivers/radeonsi/si_blit.c +++ b/src/gallium/drivers/radeonsi/si_blit.c @@ -392,21 +392,19 @@ si_decompress_depth(struct si_context *sctx, * when we don't decompress but TC-compatible planes are dirty. */ si_make_DB_shader_coherent(sctx, tex->resource.b.b.nr_samples, - inplace_planes & PIPE_MASK_S); + inplace_planes & PIPE_MASK_S, + tex->tc_compatible_htile && + first_level == 0); - /* If we flush DB caches for TC-compatible depth, the dirty - * state becomes 0 for the whole mipmap tree and all planes. - * (there is nothing else to flush) - */ if (tex->tc_compatible_htile) { /* Only clear the mask that we are flushing, because * si_make_DB_shader_coherent() can treat depth and * stencil differently. */ if (inplace_planes & PIPE_MASK_Z) - tex->dirty_level_mask = 0; + tex->dirty_level_mask &= ~levels_z; if (inplace_planes & PIPE_MASK_S) - tex->stencil_dirty_level_mask = 0; + tex->stencil_dirty_level_mask &= ~levels_s; } } /* set_framebuffer_state takes care of coherency for single-sample. diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index e5aeb9a7e88..386a6dc886d 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -202,6 +202,7 @@ struct si_framebuffer { bool dirty_zsbuf; bool any_dst_linear; bool CB_has_shader_readable_metadata; + bool DB_has_shader_readable_metadata; }; struct si_clip_state { @@ -639,14 +640,24 @@ si_make_CB_shader_coherent(struct si_context *sctx, unsigned num_samples, static inline void si_make_DB_shader_coherent(struct si_context *sctx, unsigned num_samples, - bool include_stencil) + bool include_stencil, bool shaders_read_metadata) { sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_DB | SI_CONTEXT_INV_VMEM_L1; - /* Single-sample depth (not stencil) is coherent with shaders on GFX9. */ - if (sctx->b.chip_class <= VI || num_samples >= 2 || include_stencil) + if (sctx->b.chip_class >= GFX9) { + /* Single-sample depth (not stencil) is coherent with shaders + * on GFX9, but L2 metadata must be flushed if shaders read + * metadata. + */ + if (num_samples >= 2 || include_stencil) + sctx->b.flags |= SI_CONTEXT_INV_GLOBAL_L2; + else if (shaders_read_metadata) + sctx->b.flags |= SI_CONTEXT_INV_L2_METADATA; + } else { + /* SI-CI-VI */ sctx->b.flags |= SI_CONTEXT_INV_GLOBAL_L2; + } } #endif diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index fb97052aacf..d391d5f501a 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -2585,7 +2585,8 @@ static void si_set_framebuffer_state(struct pipe_context *ctx, * Note that lower mipmap levels aren't compressed. */ if (sctx->generate_mipmap_for_depth) - si_make_DB_shader_coherent(sctx, 1, false); + si_make_DB_shader_coherent(sctx, 1, false, + sctx->framebuffer.DB_has_shader_readable_metadata); /* Take the maximum of the old and new count. If the new count is lower, * dirtying is needed to disable the unbound colorbuffers. @@ -2610,6 +2611,7 @@ static void si_set_framebuffer_state(struct pipe_context *ctx, sctx->framebuffer.log_samples = util_logbase2(sctx->framebuffer.nr_samples); sctx->framebuffer.any_dst_linear = false; sctx->framebuffer.CB_has_shader_readable_metadata = false; + sctx->framebuffer.DB_has_shader_readable_metadata = false; for (i = 0; i < state->nr_cbufs; i++) { if (!state->cbufs[i]) @@ -2665,6 +2667,10 @@ static void si_set_framebuffer_state(struct pipe_context *ctx, if (!surf->depth_initialized) { si_init_depth_surface(sctx, surf); } + + if (rtex->tc_compatible_htile && !surf->base.u.tex.level) + sctx->framebuffer.DB_has_shader_readable_metadata = true; + r600_context_add_resource_size(ctx, surf->base.texture); }