From aa64e24cb16cc63613ba99909059c1a0f7610ae2 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sat, 19 Aug 2017 15:06:22 +0200 Subject: [PATCH] radeonsi/gfx9: don't flush L2 metadata for CB if not needed MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Reviewed-by: Nicolai Hähnle --- src/gallium/drivers/radeonsi/si_blit.c | 8 ++++--- src/gallium/drivers/radeonsi/si_pipe.h | 23 ++++++++++++++++---- src/gallium/drivers/radeonsi/si_state.c | 13 +++++++---- src/gallium/drivers/radeonsi/si_state_draw.c | 11 +++++----- 4 files changed, 38 insertions(+), 17 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c index 1b001cc04c8..5545f5b2a5b 100644 --- a/src/gallium/drivers/radeonsi/si_blit.c +++ b/src/gallium/drivers/radeonsi/si_blit.c @@ -413,7 +413,8 @@ si_decompress_depth(struct si_context *sctx, * The DB->CB copy uses CB for the final writes. */ if (copy_planes && tex->resource.b.b.nr_samples > 1) - si_make_CB_shader_coherent(sctx, tex->resource.b.b.nr_samples); + si_make_CB_shader_coherent(sctx, tex->resource.b.b.nr_samples, + false); } static void @@ -524,7 +525,8 @@ static void si_blit_decompress_color(struct pipe_context *ctx, } sctx->decompression_enabled = false; - si_make_CB_shader_coherent(sctx, rtex->resource.b.b.nr_samples); + si_make_CB_shader_coherent(sctx, rtex->resource.b.b.nr_samples, + vi_dcc_enabled(rtex, first_level)); } static void @@ -1213,7 +1215,7 @@ static void si_do_CB_resolve(struct si_context *sctx, si_blitter_end(&sctx->b.b); /* Flush caches for possible texturing. */ - si_make_CB_shader_coherent(sctx, 1); + si_make_CB_shader_coherent(sctx, 1, false); } static bool do_hardware_msaa_resolve(struct pipe_context *ctx, diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index f2a20ba4668..e5aeb9a7e88 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -58,7 +58,10 @@ /* Write dirty L2 lines back to memory (shader and CP DMA stores), but don't * invalidate L2. SI-CIK can't do it, so they will do complete invalidation. */ #define SI_CONTEXT_WRITEBACK_GLOBAL_L2 (R600_CONTEXT_PRIVATE_FLAG << 4) -/* gaps */ +/* Writeback & invalidate the L2 metadata cache. It can only be coupled with + * a CB or DB flush. */ +#define SI_CONTEXT_INV_L2_METADATA (R600_CONTEXT_PRIVATE_FLAG << 5) +/* gap */ /* Framebuffer caches. */ #define SI_CONTEXT_FLUSH_AND_INV_DB (R600_CONTEXT_PRIVATE_FLAG << 7) #define SI_CONTEXT_FLUSH_AND_INV_CB (R600_CONTEXT_PRIVATE_FLAG << 8) @@ -198,6 +201,7 @@ struct si_framebuffer { ubyte dirty_cbufs; bool dirty_zsbuf; bool any_dst_linear; + bool CB_has_shader_readable_metadata; }; struct si_clip_state { @@ -612,14 +616,25 @@ si_saved_cs_reference(struct si_saved_cs **dst, struct si_saved_cs *src) } static inline void -si_make_CB_shader_coherent(struct si_context *sctx, unsigned num_samples) +si_make_CB_shader_coherent(struct si_context *sctx, unsigned num_samples, + bool shaders_read_metadata) { sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_CB | SI_CONTEXT_INV_VMEM_L1; - /* Single-sample color is coherent with shaders on GFX9. */ - if (sctx->b.chip_class <= VI || num_samples >= 2) + if (sctx->b.chip_class >= GFX9) { + /* Single-sample color is coherent with shaders on GFX9, but + * L2 metadata must be flushed if shaders read metadata. + * (DCC, CMASK). + */ + if (num_samples >= 2) + sctx->b.flags |= SI_CONTEXT_INV_GLOBAL_L2; + else if (shaders_read_metadata) + sctx->b.flags |= SI_CONTEXT_INV_L2_METADATA; + } else { + /* SI-CI-VI */ sctx->b.flags |= SI_CONTEXT_INV_GLOBAL_L2; + } } static inline void diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index d116c07ee6e..fb97052aacf 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -2573,7 +2573,8 @@ static void si_set_framebuffer_state(struct pipe_context *ctx, */ if (sctx->framebuffer.nr_samples <= 1 && sctx->framebuffer.state.nr_cbufs) - si_make_CB_shader_coherent(sctx, sctx->framebuffer.nr_samples); + si_make_CB_shader_coherent(sctx, sctx->framebuffer.nr_samples, + sctx->framebuffer.CB_has_shader_readable_metadata); sctx->b.flags |= SI_CONTEXT_CS_PARTIAL_FLUSH; @@ -2608,6 +2609,7 @@ static void si_set_framebuffer_state(struct pipe_context *ctx, sctx->framebuffer.nr_samples = util_framebuffer_get_num_samples(state); sctx->framebuffer.log_samples = util_logbase2(sctx->framebuffer.nr_samples); sctx->framebuffer.any_dst_linear = false; + sctx->framebuffer.CB_has_shader_readable_metadata = false; for (i = 0; i < state->nr_cbufs; i++) { if (!state->cbufs[i]) @@ -2642,6 +2644,9 @@ static void si_set_framebuffer_state(struct pipe_context *ctx, if (rtex->surface.is_linear) sctx->framebuffer.any_dst_linear = true; + if (vi_dcc_enabled(rtex, surf->base.u.tex.level)) + sctx->framebuffer.CB_has_shader_readable_metadata = true; + r600_context_add_resource_size(ctx, surf->base.texture); p_atomic_inc(&rtex->framebuffers_bound); @@ -4022,7 +4027,8 @@ static void si_texture_barrier(struct pipe_context *ctx, unsigned flags) /* Multisample surfaces are flushed in si_decompress_textures. */ if (sctx->framebuffer.nr_samples <= 1 && sctx->framebuffer.state.nr_cbufs) - si_make_CB_shader_coherent(sctx, sctx->framebuffer.nr_samples); + si_make_CB_shader_coherent(sctx, sctx->framebuffer.nr_samples, + sctx->framebuffer.CB_has_shader_readable_metadata); } /* This only ensures coherency for shader image/buffer stores. */ @@ -4067,8 +4073,7 @@ static void si_memory_barrier(struct pipe_context *ctx, unsigned flags) sctx->framebuffer.state.nr_cbufs) { sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_CB; - /* Single-sample color is coherent with TC on GFX9. */ - if (sctx->screen->b.chip_class <= VI) + if (sctx->b.chip_class <= VI) sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2; } diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index 7bc52f29e25..1d8be49a480 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -988,13 +988,12 @@ void si_emit_cache_flush(struct si_context *sctx) * TC | TC_MD = writeback & invalidate L2 metadata (DCC, etc.) * TCL1 = invalidate L1 */ + tc_flags = 0; - /* When flushing CB or DB, L2 metadata should always be invali- - * dated before texturing. Invalidating L2 data is not needed - * in some cases. - */ - tc_flags = EVENT_TC_ACTION_ENA | - EVENT_TC_MD_ACTION_ENA; + if (rctx->flags & SI_CONTEXT_INV_L2_METADATA) { + tc_flags = EVENT_TC_ACTION_ENA | + EVENT_TC_MD_ACTION_ENA; + } /* Ideally flush TC together with CB/DB. */ if (rctx->flags & SI_CONTEXT_INV_GLOBAL_L2) { -- 2.30.2